HuYaLM-100B-fp16 / config.json
BlackSamorez's picture
initial implementation
22d23ba
raw
history blame
871 Bytes
{
"architectures": [
"RWForCausalLM"
],
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_yalm.YalmConfig",
"AutoModel": "modelling_yalm.YalmModel",
"AutoModelForSequenceClassification": "modelling_yalm.RWForSequenceClassification",
"AutoModelForCausalLM": "modelling_yalm.YalmForCausalLM"
},
"padded_vocab_size": 128000,
"embedding_size": 2048,
"hidden_size": 10240,
"intermediate_size": 27308,
"num_layers": 80,
"num_attention_heads": 128,
"scale_attn_by_inverse_layer_idx": true,
"activation_type": "geglu",
"model_type": "YaLM",
"max_position_embeddings": 1024,
"apply_residual_connection_post_layernorm": false,
"initializer_range": 0.02,
"layernorm_epsilon": 1e-5,
"torch_dtype": "float16",
"transformers_version": "4.32.1"
}