TokenFormer-150M / config.json
Haiyang-W's picture
Upload config.json
a460767 verified
raw
history blame
875 Bytes
{
"architectures": [
"TokenFormerForCausalLM"
],
"num_layers": 12,
"hidden_size": 768,
"num_attention_heads": 12,
"qkv_slot_num": 768,
"proj_slot_num": 768,
"ffn_slot_num": 3072,
"seq_length": 2048,
"max_position_embeddings": 2048,
"pos_emb": "rotary",
"rotary_pct": 0.25,
"no_weight_tying": false,
"norm": "layernorm_nonparam",
"final_norm": "layernorm",
"gpt_j_residual": false,
"output_layer_parallelism": "column",
"use_bias_in_attn_linear": false,
"attention_config": [[["tokenformer"], 12]],
"norm_activation_type": "l2_norm_gelu",
"scaled_upper_triang_masked_softmax_fusion": false,
"bias_gelu_fusion": false,
"rope_fusion": false,
"layernorm_fusion": false,
"init_method": "normal",
"output_layer_init_method": "wang_init",
"use_cache": true,
"torch_dtype": "float16",
"transformers_version": "4.36.0"
}