File size: 2,259 Bytes
5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 76477f6 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 5ef2b59 962cfc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
{
"_attn_implementation_internal": "eager",
"_name_or_path": "jetmoe/jetmoe-8b-chat",
"activation_function": "silu",
"add_cross_attention": false,
"architectures": [
"JetMoEForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration_jetmoe.JetMoEConfig",
"AutoModelForCausalLM": "modeling_jetmoe.JetMoEForCausalLM"
},
"aux_loss_coef": 0.01,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bias": true,
"bos_token_id": 1,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": 2,
"exponential_decay_length_penalty": null,
"ffn_hidden_size": 5632,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"glu": true,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"initializer_range": 0.01,
"is_decoder": false,
"is_encoder_decoder": false,
"kv_channels": 128,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_epsilon": 1e-05,
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "jetmoe",
"moe_num_experts": 8,
"moe_top_k": 2,
"n_embd": 2048,
"n_head": 16,
"n_layer": 24,
"n_positions": 4096,
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_key_value_heads": 16,
"num_layers": 24,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"rms_norm_eps": 1e-05,
"rope_theta": 10000.0,
"rotary_percent": 1.0,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"transformers_version": null,
"typical_p": 1.0,
"use_bfloat16": false,
"use_cache": true,
"vocab_size": 32000
} |