Haiyang-W commited on
Commit
a460767
1 Parent(s): d3c74fb

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +32 -0
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "TokenFormerForCausalLM"
4
+ ],
5
+ "num_layers": 12,
6
+ "hidden_size": 768,
7
+ "num_attention_heads": 12,
8
+ "qkv_slot_num": 768,
9
+ "proj_slot_num": 768,
10
+ "ffn_slot_num": 3072,
11
+ "seq_length": 2048,
12
+ "max_position_embeddings": 2048,
13
+ "pos_emb": "rotary",
14
+ "rotary_pct": 0.25,
15
+ "no_weight_tying": false,
16
+ "norm": "layernorm_nonparam",
17
+ "final_norm": "layernorm",
18
+ "gpt_j_residual": false,
19
+ "output_layer_parallelism": "column",
20
+ "use_bias_in_attn_linear": false,
21
+ "attention_config": [[["tokenformer"], 12]],
22
+ "norm_activation_type": "l2_norm_gelu",
23
+ "scaled_upper_triang_masked_softmax_fusion": false,
24
+ "bias_gelu_fusion": false,
25
+ "rope_fusion": false,
26
+ "layernorm_fusion": false,
27
+ "init_method": "normal",
28
+ "output_layer_init_method": "wang_init",
29
+ "use_cache": true,
30
+ "torch_dtype": "float16",
31
+ "transformers_version": "4.36.0"
32
+ }