File size: 871 Bytes
22d23ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
{
    "architectures": [
        "RWForCausalLM"
    ],
    "attention_dropout": 0.0,
    "auto_map": {
        "AutoConfig": "configuration_yalm.YalmConfig",
        "AutoModel": "modelling_yalm.YalmModel",
        "AutoModelForSequenceClassification": "modelling_yalm.RWForSequenceClassification",
        "AutoModelForCausalLM": "modelling_yalm.YalmForCausalLM"
    },
    "padded_vocab_size": 128000,
    "embedding_size": 2048,
    "hidden_size": 10240,
    "intermediate_size": 27308,
    "num_layers": 80,
    "num_attention_heads": 128,
    "scale_attn_by_inverse_layer_idx": true,
    "activation_type": "geglu",
    "model_type": "YaLM",
    "max_position_embeddings": 1024,
    "apply_residual_connection_post_layernorm": false,
    "initializer_range": 0.02,
    "layernorm_epsilon": 1e-5,
    "torch_dtype": "float16",
    "transformers_version": "4.32.1"
}