Models
Docs
Pricing
Sign in
Download
Models
Download
Docs
Pricing
Sign in
qwen3.6
:35b-a3b-mtp-bf16
1.9M
Downloads
Updated
5 hours ago
Qwen3.6 delivers substantial upgrades in agentic coding and thinking preservation than previous Qwen models.
Qwen3.6 delivers substantial upgrades in agentic coding and thinking preservation than previous Qwen models.
Cancel
vision
tools
thinking
27b
35b
qwen3.6:35b-a3b-mtp-bf16
...
/
model
394a99ef6e5d · 71GB
Metadata
general.architecture
qwen35moe
qwen35moe
general.file_type
F16
F16
qwen35moe.attention.head_count
16
16
qwen35moe.attention.head_count_kv
2
2
qwen35moe.attention.key_length
256
256
qwen35moe.attention.layer_norm_rms_epsilon
1e-06
1e-06
qwen35moe.attention.value_length
256
256
qwen35moe.block_count
41
41
qwen35moe.context_length
262144
262144
qwen35moe.embedding_length
2048
2048
qwen35moe.expert_count
256
256
qwen35moe.expert_feed_forward_length
512
512
qwen35moe.expert_shared_feed_forward_length
512
512
qwen35moe.expert_used_count
8
8
qwen35moe.feed_forward_length
0
0
qwen35moe.full_attention_interval
4
4
qwen35moe.nextn_predict_layers
1
1
qwen35moe.rope.dimension_count
64
64
qwen35moe.rope.dimension_sections
[11, 11, 10, 0]
[11, 11, 10, 0]
qwen35moe.rope.freq_base
1e+07
1e+07
qwen35moe.ssm.conv_kernel
4
4
qwen35moe.ssm.group_count
16
16
qwen35moe.ssm.inner_size
4096
4096
qwen35moe.ssm.state_size
128
128
qwen35moe.ssm.time_step_rank
32
32
tokenizer.ggml.eos_token_id
248046
248046
tokenizer.ggml.eos_token_ids
[248046, 248044]
[248046, 248044]
tokenizer.ggml.merges
[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
248044
248044
tokenizer.ggml.pre
qwen35
qwen35
tokenizer.ggml.scores
[0, 1, 2, 3, 4, ...]
[0, 1, 2, 3, 4, ...]
tokenizer.ggml.token_type
[1, 1, 1, 1, 1, ...]
[1, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[!, ", #, $, %, ...]
[!, ", #, $, %, ...]
Tensor
Name
Type
Shape
token_embd.weight
BF16
BF16
[2048, 248320]
blk.0
blk.0.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.0.attn_norm.weight
F32
F32
[2048]
blk.0.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.0.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.0.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.0.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.0.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.0.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.0.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.0.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.0.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.0.post_attention_norm.weight
F32
F32
[2048]
blk.0.ssm_a
F32
F32
[32]
blk.0.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.0.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.0.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.0.ssm_dt.bias
F32
F32
[32]
blk.0.ssm_norm.weight
F32
F32
[128]
blk.0.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.1
blk.1.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.1.attn_norm.weight
F32
F32
[2048]
blk.1.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.1.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.1.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.1.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.1.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.1.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.1.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.1.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.1.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.1.post_attention_norm.weight
F32
F32
[2048]
blk.1.ssm_a
F32
F32
[32]
blk.1.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.1.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.1.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.1.ssm_dt.bias
F32
F32
[32]
blk.1.ssm_norm.weight
F32
F32
[128]
blk.1.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.2
blk.2.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.2.attn_norm.weight
F32
F32
[2048]
blk.2.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.2.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.2.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.2.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.2.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.2.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.2.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.2.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.2.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.2.post_attention_norm.weight
F32
F32
[2048]
blk.2.ssm_a
F32
F32
[32]
blk.2.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.2.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.2.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.2.ssm_dt.bias
F32
F32
[32]
blk.2.ssm_norm.weight
F32
F32
[128]
blk.2.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.3
blk.3.attn_k.weight
BF16
BF16
[2048, 512]
blk.3.attn_k_norm.weight
F32
F32
[256]
blk.3.attn_norm.weight
F32
F32
[2048]
blk.3.attn_output.weight
BF16
BF16
[4096, 2048]
blk.3.attn_q.weight
BF16
BF16
[2048, 8192]
blk.3.attn_q_norm.weight
F32
F32
[256]
blk.3.attn_v.weight
BF16
BF16
[2048, 512]
blk.3.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.3.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.3.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.3.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.3.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.3.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.3.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.3.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.3.post_attention_norm.weight
F32
F32
[2048]
blk.4
blk.4.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.4.attn_norm.weight
F32
F32
[2048]
blk.4.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.4.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.4.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.4.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.4.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.4.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.4.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.4.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.4.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.4.post_attention_norm.weight
F32
F32
[2048]
blk.4.ssm_a
F32
F32
[32]
blk.4.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.4.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.4.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.4.ssm_dt.bias
F32
F32
[32]
blk.4.ssm_norm.weight
F32
F32
[128]
blk.4.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.5
blk.5.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.5.attn_norm.weight
F32
F32
[2048]
blk.5.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.5.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.5.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.5.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.5.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.5.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.5.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.5.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.5.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.5.post_attention_norm.weight
F32
F32
[2048]
blk.5.ssm_a
F32
F32
[32]
blk.5.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.5.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.5.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.5.ssm_dt.bias
F32
F32
[32]
blk.5.ssm_norm.weight
F32
F32
[128]
blk.5.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.6
blk.6.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.6.attn_norm.weight
F32
F32
[2048]
blk.6.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.6.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.6.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.6.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.6.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.6.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.6.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.6.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.6.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.6.post_attention_norm.weight
F32
F32
[2048]
blk.6.ssm_a
F32
F32
[32]
blk.6.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.6.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.6.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.6.ssm_dt.bias
F32
F32
[32]
blk.6.ssm_norm.weight
F32
F32
[128]
blk.6.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.7
blk.7.attn_k.weight
BF16
BF16
[2048, 512]
blk.7.attn_k_norm.weight
F32
F32
[256]
blk.7.attn_norm.weight
F32
F32
[2048]
blk.7.attn_output.weight
BF16
BF16
[4096, 2048]
blk.7.attn_q.weight
BF16
BF16
[2048, 8192]
blk.7.attn_q_norm.weight
F32
F32
[256]
blk.7.attn_v.weight
BF16
BF16
[2048, 512]
blk.7.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.7.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.7.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.7.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.7.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.7.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.7.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.7.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.7.post_attention_norm.weight
F32
F32
[2048]
blk.8
blk.8.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.8.attn_norm.weight
F32
F32
[2048]
blk.8.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.8.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.8.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.8.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.8.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.8.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.8.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.8.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.8.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.8.post_attention_norm.weight
F32
F32
[2048]
blk.8.ssm_a
F32
F32
[32]
blk.8.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.8.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.8.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.8.ssm_dt.bias
F32
F32
[32]
blk.8.ssm_norm.weight
F32
F32
[128]
blk.8.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.9
blk.9.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.9.attn_norm.weight
F32
F32
[2048]
blk.9.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.9.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.9.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.9.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.9.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.9.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.9.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.9.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.9.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.9.post_attention_norm.weight
F32
F32
[2048]
blk.9.ssm_a
F32
F32
[32]
blk.9.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.9.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.9.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.9.ssm_dt.bias
F32
F32
[32]
blk.9.ssm_norm.weight
F32
F32
[128]
blk.9.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.10
blk.10.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.10.attn_norm.weight
F32
F32
[2048]
blk.10.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.10.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.10.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.10.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.10.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.10.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.10.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.10.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.10.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.10.post_attention_norm.weight
F32
F32
[2048]
blk.10.ssm_a
F32
F32
[32]
blk.10.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.10.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.10.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.10.ssm_dt.bias
F32
F32
[32]
blk.10.ssm_norm.weight
F32
F32
[128]
blk.10.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.11
blk.11.attn_k.weight
BF16
BF16
[2048, 512]
blk.11.attn_k_norm.weight
F32
F32
[256]
blk.11.attn_norm.weight
F32
F32
[2048]
blk.11.attn_output.weight
BF16
BF16
[4096, 2048]
blk.11.attn_q.weight
BF16
BF16
[2048, 8192]
blk.11.attn_q_norm.weight
F32
F32
[256]
blk.11.attn_v.weight
BF16
BF16
[2048, 512]
blk.11.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.11.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.11.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.11.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.11.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.11.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.11.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.11.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.11.post_attention_norm.weight
F32
F32
[2048]
blk.12
blk.12.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.12.attn_norm.weight
F32
F32
[2048]
blk.12.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.12.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.12.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.12.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.12.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.12.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.12.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.12.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.12.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.12.post_attention_norm.weight
F32
F32
[2048]
blk.12.ssm_a
F32
F32
[32]
blk.12.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.12.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.12.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.12.ssm_dt.bias
F32
F32
[32]
blk.12.ssm_norm.weight
F32
F32
[128]
blk.12.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.13
blk.13.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.13.attn_norm.weight
F32
F32
[2048]
blk.13.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.13.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.13.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.13.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.13.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.13.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.13.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.13.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.13.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.13.post_attention_norm.weight
F32
F32
[2048]
blk.13.ssm_a
F32
F32
[32]
blk.13.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.13.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.13.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.13.ssm_dt.bias
F32
F32
[32]
blk.13.ssm_norm.weight
F32
F32
[128]
blk.13.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.14
blk.14.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.14.attn_norm.weight
F32
F32
[2048]
blk.14.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.14.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.14.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.14.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.14.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.14.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.14.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.14.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.14.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.14.post_attention_norm.weight
F32
F32
[2048]
blk.14.ssm_a
F32
F32
[32]
blk.14.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.14.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.14.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.14.ssm_dt.bias
F32
F32
[32]
blk.14.ssm_norm.weight
F32
F32
[128]
blk.14.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.15
blk.15.attn_k.weight
BF16
BF16
[2048, 512]
blk.15.attn_k_norm.weight
F32
F32
[256]
blk.15.attn_norm.weight
F32
F32
[2048]
blk.15.attn_output.weight
BF16
BF16
[4096, 2048]
blk.15.attn_q.weight
BF16
BF16
[2048, 8192]
blk.15.attn_q_norm.weight
F32
F32
[256]
blk.15.attn_v.weight
BF16
BF16
[2048, 512]
blk.15.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.15.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.15.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.15.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.15.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.15.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.15.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.15.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.15.post_attention_norm.weight
F32
F32
[2048]
blk.16
blk.16.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.16.attn_norm.weight
F32
F32
[2048]
blk.16.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.16.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.16.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.16.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.16.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.16.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.16.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.16.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.16.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.16.post_attention_norm.weight
F32
F32
[2048]
blk.16.ssm_a
F32
F32
[32]
blk.16.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.16.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.16.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.16.ssm_dt.bias
F32
F32
[32]
blk.16.ssm_norm.weight
F32
F32
[128]
blk.16.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.17
blk.17.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.17.attn_norm.weight
F32
F32
[2048]
blk.17.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.17.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.17.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.17.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.17.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.17.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.17.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.17.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.17.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.17.post_attention_norm.weight
F32
F32
[2048]
blk.17.ssm_a
F32
F32
[32]
blk.17.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.17.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.17.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.17.ssm_dt.bias
F32
F32
[32]
blk.17.ssm_norm.weight
F32
F32
[128]
blk.17.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.18
blk.18.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.18.attn_norm.weight
F32
F32
[2048]
blk.18.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.18.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.18.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.18.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.18.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.18.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.18.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.18.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.18.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.18.post_attention_norm.weight
F32
F32
[2048]
blk.18.ssm_a
F32
F32
[32]
blk.18.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.18.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.18.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.18.ssm_dt.bias
F32
F32
[32]
blk.18.ssm_norm.weight
F32
F32
[128]
blk.18.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.19
blk.19.attn_k.weight
BF16
BF16
[2048, 512]
blk.19.attn_k_norm.weight
F32
F32
[256]
blk.19.attn_norm.weight
F32
F32
[2048]
blk.19.attn_output.weight
BF16
BF16
[4096, 2048]
blk.19.attn_q.weight
BF16
BF16
[2048, 8192]
blk.19.attn_q_norm.weight
F32
F32
[256]
blk.19.attn_v.weight
BF16
BF16
[2048, 512]
blk.19.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.19.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.19.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.19.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.19.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.19.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.19.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.19.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.19.post_attention_norm.weight
F32
F32
[2048]
blk.20
blk.20.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.20.attn_norm.weight
F32
F32
[2048]
blk.20.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.20.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.20.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.20.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.20.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.20.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.20.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.20.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.20.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.20.post_attention_norm.weight
F32
F32
[2048]
blk.20.ssm_a
F32
F32
[32]
blk.20.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.20.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.20.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.20.ssm_dt.bias
F32
F32
[32]
blk.20.ssm_norm.weight
F32
F32
[128]
blk.20.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.21
blk.21.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.21.attn_norm.weight
F32
F32
[2048]
blk.21.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.21.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.21.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.21.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.21.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.21.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.21.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.21.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.21.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.21.post_attention_norm.weight
F32
F32
[2048]
blk.21.ssm_a
F32
F32
[32]
blk.21.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.21.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.21.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.21.ssm_dt.bias
F32
F32
[32]
blk.21.ssm_norm.weight
F32
F32
[128]
blk.21.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.22
blk.22.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.22.attn_norm.weight
F32
F32
[2048]
blk.22.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.22.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.22.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.22.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.22.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.22.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.22.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.22.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.22.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.22.post_attention_norm.weight
F32
F32
[2048]
blk.22.ssm_a
F32
F32
[32]
blk.22.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.22.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.22.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.22.ssm_dt.bias
F32
F32
[32]
blk.22.ssm_norm.weight
F32
F32
[128]
blk.22.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.23
blk.23.attn_k.weight
BF16
BF16
[2048, 512]
blk.23.attn_k_norm.weight
F32
F32
[256]
blk.23.attn_norm.weight
F32
F32
[2048]
blk.23.attn_output.weight
BF16
BF16
[4096, 2048]
blk.23.attn_q.weight
BF16
BF16
[2048, 8192]
blk.23.attn_q_norm.weight
F32
F32
[256]
blk.23.attn_v.weight
BF16
BF16
[2048, 512]
blk.23.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.23.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.23.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.23.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.23.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.23.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.23.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.23.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.23.post_attention_norm.weight
F32
F32
[2048]
blk.24
blk.24.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.24.attn_norm.weight
F32
F32
[2048]
blk.24.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.24.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.24.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.24.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.24.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.24.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.24.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.24.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.24.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.24.post_attention_norm.weight
F32
F32
[2048]
blk.24.ssm_a
F32
F32
[32]
blk.24.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.24.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.24.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.24.ssm_dt.bias
F32
F32
[32]
blk.24.ssm_norm.weight
F32
F32
[128]
blk.24.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.25
blk.25.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.25.attn_norm.weight
F32
F32
[2048]
blk.25.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.25.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.25.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.25.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.25.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.25.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.25.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.25.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.25.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.25.post_attention_norm.weight
F32
F32
[2048]
blk.25.ssm_a
F32
F32
[32]
blk.25.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.25.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.25.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.25.ssm_dt.bias
F32
F32
[32]
blk.25.ssm_norm.weight
F32
F32
[128]
blk.25.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.26
blk.26.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.26.attn_norm.weight
F32
F32
[2048]
blk.26.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.26.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.26.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.26.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.26.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.26.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.26.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.26.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.26.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.26.post_attention_norm.weight
F32
F32
[2048]
blk.26.ssm_a
F32
F32
[32]
blk.26.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.26.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.26.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.26.ssm_dt.bias
F32
F32
[32]
blk.26.ssm_norm.weight
F32
F32
[128]
blk.26.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.27
blk.27.attn_k.weight
BF16
BF16
[2048, 512]
blk.27.attn_k_norm.weight
F32
F32
[256]
blk.27.attn_norm.weight
F32
F32
[2048]
blk.27.attn_output.weight
BF16
BF16
[4096, 2048]
blk.27.attn_q.weight
BF16
BF16
[2048, 8192]
blk.27.attn_q_norm.weight
F32
F32
[256]
blk.27.attn_v.weight
BF16
BF16
[2048, 512]
blk.27.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.27.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.27.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.27.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.27.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.27.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.27.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.27.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.27.post_attention_norm.weight
F32
F32
[2048]
blk.28
blk.28.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.28.attn_norm.weight
F32
F32
[2048]
blk.28.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.28.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.28.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.28.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.28.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.28.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.28.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.28.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.28.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.28.post_attention_norm.weight
F32
F32
[2048]
blk.28.ssm_a
F32
F32
[32]
blk.28.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.28.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.28.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.28.ssm_dt.bias
F32
F32
[32]
blk.28.ssm_norm.weight
F32
F32
[128]
blk.28.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.29
blk.29.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.29.attn_norm.weight
F32
F32
[2048]
blk.29.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.29.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.29.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.29.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.29.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.29.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.29.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.29.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.29.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.29.post_attention_norm.weight
F32
F32
[2048]
blk.29.ssm_a
F32
F32
[32]
blk.29.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.29.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.29.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.29.ssm_dt.bias
F32
F32
[32]
blk.29.ssm_norm.weight
F32
F32
[128]
blk.29.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.30
blk.30.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.30.attn_norm.weight
F32
F32
[2048]
blk.30.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.30.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.30.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.30.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.30.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.30.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.30.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.30.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.30.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.30.post_attention_norm.weight
F32
F32
[2048]
blk.30.ssm_a
F32
F32
[32]
blk.30.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.30.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.30.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.30.ssm_dt.bias
F32
F32
[32]
blk.30.ssm_norm.weight
F32
F32
[128]
blk.30.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.31
blk.31.attn_k.weight
BF16
BF16
[2048, 512]
blk.31.attn_k_norm.weight
F32
F32
[256]
blk.31.attn_norm.weight
F32
F32
[2048]
blk.31.attn_output.weight
BF16
BF16
[4096, 2048]
blk.31.attn_q.weight
BF16
BF16
[2048, 8192]
blk.31.attn_q_norm.weight
F32
F32
[256]
blk.31.attn_v.weight
BF16
BF16
[2048, 512]
blk.31.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.31.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.31.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.31.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.31.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.31.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.31.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.31.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.31.post_attention_norm.weight
F32
F32
[2048]
blk.32
blk.32.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.32.attn_norm.weight
F32
F32
[2048]
blk.32.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.32.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.32.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.32.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.32.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.32.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.32.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.32.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.32.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.32.post_attention_norm.weight
F32
F32
[2048]
blk.32.ssm_a
F32
F32
[32]
blk.32.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.32.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.32.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.32.ssm_dt.bias
F32
F32
[32]
blk.32.ssm_norm.weight
F32
F32
[128]
blk.32.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.33
blk.33.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.33.attn_norm.weight
F32
F32
[2048]
blk.33.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.33.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.33.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.33.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.33.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.33.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.33.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.33.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.33.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.33.post_attention_norm.weight
F32
F32
[2048]
blk.33.ssm_a
F32
F32
[32]
blk.33.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.33.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.33.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.33.ssm_dt.bias
F32
F32
[32]
blk.33.ssm_norm.weight
F32
F32
[128]
blk.33.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.34
blk.34.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.34.attn_norm.weight
F32
F32
[2048]
blk.34.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.34.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.34.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.34.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.34.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.34.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.34.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.34.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.34.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.34.post_attention_norm.weight
F32
F32
[2048]
blk.34.ssm_a
F32
F32
[32]
blk.34.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.34.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.34.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.34.ssm_dt.bias
F32
F32
[32]
blk.34.ssm_norm.weight
F32
F32
[128]
blk.34.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.35
blk.35.attn_k.weight
BF16
BF16
[2048, 512]
blk.35.attn_k_norm.weight
F32
F32
[256]
blk.35.attn_norm.weight
F32
F32
[2048]
blk.35.attn_output.weight
BF16
BF16
[4096, 2048]
blk.35.attn_q.weight
BF16
BF16
[2048, 8192]
blk.35.attn_q_norm.weight
F32
F32
[256]
blk.35.attn_v.weight
BF16
BF16
[2048, 512]
blk.35.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.35.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.35.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.35.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.35.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.35.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.35.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.35.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.35.post_attention_norm.weight
F32
F32
[2048]
blk.36
blk.36.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.36.attn_norm.weight
F32
F32
[2048]
blk.36.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.36.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.36.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.36.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.36.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.36.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.36.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.36.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.36.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.36.post_attention_norm.weight
F32
F32
[2048]
blk.36.ssm_a
F32
F32
[32]
blk.36.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.36.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.36.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.36.ssm_dt.bias
F32
F32
[32]
blk.36.ssm_norm.weight
F32
F32
[128]
blk.36.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.37
blk.37.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.37.attn_norm.weight
F32
F32
[2048]
blk.37.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.37.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.37.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.37.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.37.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.37.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.37.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.37.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.37.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.37.post_attention_norm.weight
F32
F32
[2048]
blk.37.ssm_a
F32
F32
[32]
blk.37.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.37.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.37.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.37.ssm_dt.bias
F32
F32
[32]
blk.37.ssm_norm.weight
F32
F32
[128]
blk.37.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.38
blk.38.attn_gate.weight
BF16
BF16
[2048, 4096]
blk.38.attn_norm.weight
F32
F32
[2048]
blk.38.attn_qkv.weight
BF16
BF16
[2048, 8192]
blk.38.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.38.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.38.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.38.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.38.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.38.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.38.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.38.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.38.post_attention_norm.weight
F32
F32
[2048]
blk.38.ssm_a
F32
F32
[32]
blk.38.ssm_alpha.weight
BF16
BF16
[2048, 32]
blk.38.ssm_beta.weight
BF16
BF16
[2048, 32]
blk.38.ssm_conv1d.weight
F32
F32
[4, 8192]
blk.38.ssm_dt.bias
F32
F32
[32]
blk.38.ssm_norm.weight
F32
F32
[128]
blk.38.ssm_out.weight
BF16
BF16
[4096, 2048]
blk.39
blk.39.attn_k.weight
BF16
BF16
[2048, 512]
blk.39.attn_k_norm.weight
F32
F32
[256]
blk.39.attn_norm.weight
F32
F32
[2048]
blk.39.attn_output.weight
BF16
BF16
[4096, 2048]
blk.39.attn_q.weight
BF16
BF16
[2048, 8192]
blk.39.attn_q_norm.weight
F32
F32
[256]
blk.39.attn_v.weight
BF16
BF16
[2048, 512]
blk.39.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.39.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.39.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.39.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.39.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.39.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.39.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.39.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.39.post_attention_norm.weight
F32
F32
[2048]
blk.40
blk.40.attn_k.weight
BF16
BF16
[2048, 512]
blk.40.attn_k_norm.weight
F32
F32
[256]
blk.40.attn_norm.weight
F32
F32
[2048]
blk.40.attn_output.weight
BF16
BF16
[4096, 2048]
blk.40.attn_q.weight
BF16
BF16
[2048, 8192]
blk.40.attn_q_norm.weight
F32
F32
[256]
blk.40.attn_v.weight
BF16
BF16
[2048, 512]
blk.40.ffn_down_exps.weight
BF16
BF16
[512, 2048, 256]
blk.40.ffn_down_shexp.weight
BF16
BF16
[512, 2048]
blk.40.ffn_gate_exps.weight
BF16
BF16
[2048, 512, 256]
blk.40.ffn_gate_inp.weight
F32
F32
[2048, 256]
blk.40.ffn_gate_inp_shexp.weight
F16
F16
[2048]
blk.40.ffn_gate_shexp.weight
BF16
BF16
[2048, 512]
blk.40.ffn_up_exps.weight
BF16
BF16
[2048, 512, 256]
blk.40.ffn_up_shexp.weight
BF16
BF16
[2048, 512]
blk.40.nextn.eh_proj.weight
BF16
BF16
[4096, 2048]
blk.40.nextn.enorm.weight
F32
F32
[2048]
blk.40.nextn.hnorm.weight
F32
F32
[2048]
blk.40.nextn.shared_head_norm.weight
F32
F32
[2048]
blk.40.post_attention_norm.weight
F32
F32
[2048]
output.weight
BF16
BF16
[2048, 248320]
output_norm.weight
F32
F32
[2048]