File size: 2,259 Bytes
5ef2b59
962cfc4
5ef2b59
 
962cfc4
5ef2b59
 
 
962cfc4
 
 
 
 
 
 
 
5ef2b59
962cfc4
 
 
 
 
 
 
5ef2b59
962cfc4
5ef2b59
962cfc4
 
 
 
 
 
 
 
 
 
 
5ef2b59
962cfc4
 
 
 
5ef2b59
 
962cfc4
 
 
5ef2b59
 
962cfc4
 
 
5ef2b59
962cfc4
 
 
76477f6
5ef2b59
962cfc4
 
 
 
 
 
 
 
 
 
 
 
5ef2b59
 
 
962cfc4
 
 
 
 
 
5ef2b59
962cfc4
 
 
 
5ef2b59
962cfc4
 
5ef2b59
962cfc4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
{
  "_attn_implementation_internal": "eager",
  "_name_or_path": "jetmoe/jetmoe-8b-chat",
  "activation_function": "silu",
  "add_cross_attention": false,
  "architectures": [
    "JetMoEForCausalLM"
  ],
  "auto_map": {
    "AutoConfig": "configuration_jetmoe.JetMoEConfig",
    "AutoModelForCausalLM": "modeling_jetmoe.JetMoEForCausalLM"
  },
  "aux_loss_coef": 0.01,
  "bad_words_ids": null,
  "begin_suppress_tokens": null,
  "bias": true,
  "bos_token_id": 1,
  "chunk_size_feed_forward": 0,
  "cross_attention_hidden_size": null,
  "decoder_start_token_id": null,
  "diversity_penalty": 0.0,
  "do_sample": false,
  "early_stopping": false,
  "encoder_no_repeat_ngram_size": 0,
  "eos_token_id": 2,
  "exponential_decay_length_penalty": null,
  "ffn_hidden_size": 5632,
  "finetuning_task": null,
  "forced_bos_token_id": null,
  "forced_eos_token_id": null,
  "glu": true,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.01,
  "is_decoder": false,
  "is_encoder_decoder": false,
  "kv_channels": 128,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_epsilon": 1e-05,
  "length_penalty": 1.0,
  "max_length": 20,
  "min_length": 0,
  "model_type": "jetmoe",
  "moe_num_experts": 8,
  "moe_top_k": 2,
  "n_embd": 2048,
  "n_head": 16,
  "n_layer": 24,
  "n_positions": 4096,
  "no_repeat_ngram_size": 0,
  "num_beam_groups": 1,
  "num_beams": 1,
  "num_key_value_heads": 16,
  "num_layers": 24,
  "num_return_sequences": 1,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_scores": false,
  "pad_token_id": null,
  "prefix": null,
  "problem_type": null,
  "pruned_heads": {},
  "remove_invalid_values": false,
  "repetition_penalty": 1.0,
  "return_dict": true,
  "return_dict_in_generate": false,
  "rms_norm_eps": 1e-05,
  "rope_theta": 10000.0,
  "rotary_percent": 1.0,
  "sep_token_id": null,
  "suppress_tokens": null,
  "task_specific_params": null,
  "temperature": 1.0,
  "tf_legacy_loss": false,
  "tie_encoder_decoder": false,
  "tie_word_embeddings": true,
  "tokenizer_class": null,
  "top_k": 50,
  "top_p": 1.0,
  "torchscript": false,
  "transformers_version": null,
  "typical_p": 1.0,
  "use_bfloat16": false,
  "use_cache": true,
  "vocab_size": 32000
}