{ | |
"vocab_size": 152064, | |
"pad_id": 151645, | |
"eos_id": -1, | |
"dim": 256, | |
"n_layers": 18, | |
"n_heads": 12, | |
"n_kv_heads": 6, | |
"use_kan": true, | |
"train_softmax_temp": true, | |
"use_softmax_temp_proj": true, | |
"softmax_bias": false, | |
"multiple_of": 256, | |
"ffn_dim_multiplier": null, | |
"rms_norm_eps": 1e-05, | |
"rope_theta": 500000, | |
"use_scaled_rope": false, | |
"max_batch_size": 100, | |
"max_seq_len": 128, | |
"num_experts": 14, | |
"num_experts_per_tok": 4, | |
"model_type": "KANaMoEv1" | |
} |