diff --git a/.gitattributes b/.gitattributes index c5689448178ec7cc369fe4c6e34d87b514bc9226..5f8a17ce7d0a5b8b74454a52f1afbb70bc560914 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2809,3 +2809,17 @@ neuronxcc-2.13.66.0+6dfecc895/MODULE_b6f59521083a4c513238+2c2d707e/model.neff fi neuronxcc-2.13.66.0+6dfecc895/MODULE_d755a6931d0c1f489fa9+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.13.66.0+6dfecc895/MODULE_d7606d9adfeb9cb0fd73+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.13.66.0+6dfecc895/MODULE_dfb85ebd0d1b542fb616+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json new file mode 100644 index 0000000000000000000000000000000000000000..66c56b2cfcaf36f773bbd1f20605e3b20b20191c --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json @@ -0,0 +1 @@ +{"apply_residual_connection_post_layernorm": false, "architectures": ["BloomForCausalLM"], "attention_dropout": 0.1, "bos_token_id": 1, "dtype": "float32", "eos_token_id": 2, "gradient_checkpointing": false, "hidden_dropout": 0.1, "hidden_size": 32, "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"}, "initializer_range": 0.02, "is_decoder": true, "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}, "layer_norm_epsilon": 1e-05, "model_type": "bloom", "n_head": 4, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM", "checkpoint_revision": "92b07e9b7b4f986fa7c54e2ac3b9201b4ba5212e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "pad_token_id": 3, "pretraining_tp": 1, "seq_length": 7, "slow_but_exact": true, "torch_dtype": "float32", "type_vocab_size": 16, "use_cache": true, "vocab_size": 1024} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4eccd2e66477ff5ef5b3.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4eccd2e66477ff5ef5b3.json new file mode 100644 index 0000000000000000000000000000000000000000..8084d8fd84a3266614d40c01a9ece1039099f4e4 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4eccd2e66477ff5ef5b3.json @@ -0,0 +1 @@ +{"vocab_size": 1024, "hidden_size": 32, "n_layer": 5, "n_head": 4, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "use_cache": true, "pretraining_tp": 1, "apply_residual_connection_post_layernorm": false, "hidden_dropout": 0.1, "attention_dropout": 0.1, "bos_token_id": 1, "eos_token_id": 2, "slow_but_exact": true, "torch_dtype": "float32", "is_decoder": true, "architectures": ["BloomForCausalLM"], "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}, "pad_token_id": 3, "dtype": "float32", "gradient_checkpointing": false, "model_type": "bloom", "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM", "checkpoint_revision": "0f4f06f162cd67d34d03ee156484e4001d468500", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "seq_length": 7, "type_vocab_size": 16} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/d635309efcd921a3a3f6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/d635309efcd921a3a3f6.json new file mode 100644 index 0000000000000000000000000000000000000000..8ca9cbe92d213fbceb03b98e6437d5d890dfd179 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/d635309efcd921a3a3f6.json @@ -0,0 +1 @@ +{"vocab_size": 1024, "hidden_size": 32, "n_layer": 5, "n_head": 4, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "use_cache": true, "pretraining_tp": 1, "apply_residual_connection_post_layernorm": false, "hidden_dropout": 0.1, "attention_dropout": 0.1, "bos_token_id": 1, "eos_token_id": 2, "slow_but_exact": true, "torch_dtype": "float32", "is_decoder": true, "architectures": ["BloomForCausalLM"], "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}, "pad_token_id": 3, "dtype": "float32", "gradient_checkpointing": false, "model_type": "bloom", "n_positions": 512, "seq_length": 7, "type_vocab_size": 16, "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM", "checkpoint_revision": "0f4f06f162cd67d34d03ee156484e4001d468500"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/2c3a47e6fca2fd6ac3cf.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/2c3a47e6fca2fd6ac3cf.json new file mode 100644 index 0000000000000000000000000000000000000000..d00f28594ee93c6094e47cd7dde67a3ca1e7e76c --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/2c3a47e6fca2fd6ac3cf.json @@ -0,0 +1 @@ +{"vocab_size": 50257, "n_positions": 1024, "n_embd": 768, "n_layer": 12, "n_head": 12, "n_inner": null, "activation_function": "gelu_new", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "architectures": ["GPT2LMHeadModel"], "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "model_type": "gpt2", "n_ctx": 1024, "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 1024, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/8ae9e463111b0fa38673.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/8ae9e463111b0fa38673.json new file mode 100644 index 0000000000000000000000000000000000000000..aeb86022d512f4d581757070095eedf687e630ae --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/8ae9e463111b0fa38673.json @@ -0,0 +1 @@ +{"vocab_size": 50257, "n_positions": 1024, "n_embd": 768, "n_layer": 12, "n_head": 12, "n_inner": null, "activation_function": "gelu_new", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "architectures": ["GPT2LMHeadModel"], "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "model_type": "gpt2", "n_ctx": 1024, "neuron": {"auto_cast_type": "fp32", "batch_size": 4, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/165740fba2c72c56c43d.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/165740fba2c72c56c43d.json new file mode 100644 index 0000000000000000000000000000000000000000..5e198e5ad9dd636c282f26327971c0da96467d6c --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/165740fba2c72c56c43d.json @@ -0,0 +1 @@ +{"activation_function": "gelu_new", "attention_probs_dropout_prob": 0.1, "attn_pdrop": 0.1, "bos_token_id": 98, "embd_pdrop": 0.1, "eos_token_id": 98, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 512, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "91c0fe31d692dd8448d9bc06e8d1877345009e3b", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "pad_token_id": 98, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "type_vocab_size": 16, "use_cache": true, "vocab_size": 1000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2037d1be0146ceb8d639.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2037d1be0146ceb8d639.json new file mode 100644 index 0000000000000000000000000000000000000000..96736ee9d5a0a60d4c3e6fd10c8e174a962276cd --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2037d1be0146ceb8d639.json @@ -0,0 +1 @@ +{"vocab_size": 1000, "n_positions": 512, "n_embd": 32, "n_layer": 5, "n_head": 4, "n_inner": null, "activation_function": "gelu_new", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 98, "eos_token_id": 98, "pad_token_id": 98, "attention_probs_dropout_prob": 0.1, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "intermediate_size": 37, "model_type": "gpt2", "n_ctx": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "91c0fe31d692dd8448d9bc06e8d1877345009e3b", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "type_vocab_size": 16} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/3edc0caf8d3805a471bd.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/3edc0caf8d3805a471bd.json new file mode 100644 index 0000000000000000000000000000000000000000..0a07d5d31fea8c90b4bffa306f569be8c76e9815 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/3edc0caf8d3805a471bd.json @@ -0,0 +1 @@ +{"vocab_size": 1000, "n_positions": 512, "n_embd": 32, "n_layer": 5, "n_head": 4, "n_inner": null, "activation_function": "gelu_new", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 98, "eos_token_id": 98, "pad_token_id": 98, "attention_probs_dropout_prob": 0.1, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "intermediate_size": 37, "model_type": "gpt2", "n_ctx": 512, "type_vocab_size": 16, "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "91c0fe31d692dd8448d9bc06e8d1877345009e3b"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/6c7cfec071642e5fecee.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/6c7cfec071642e5fecee.json new file mode 100644 index 0000000000000000000000000000000000000000..f70d6448e4174a87e8fe06e5651326e5945ba0a9 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/6c7cfec071642e5fecee.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/b9904e0c46d338b623d2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/b9904e0c46d338b623d2.json new file mode 100644 index 0000000000000000000000000000000000000000..d3fc80df684544689facdf4b43a119a8ad22dbd8 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/b9904e0c46d338b623d2.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json new file mode 100644 index 0000000000000000000000000000000000000000..c0fc1f377c0510cea111ef0d6abd2ea98a208db0 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 128, "initializer_range": 0.02, "intermediate_size": 256, "max_position_embeddings": 512, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-llama", "checkpoint_revision": "7fdafd2fe6a2d31c6abb72ae60db606d8bb23196", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 1, "num_hidden_layers": 1, "num_key_value_heads": 1, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/7c595f241fa311d7b227.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/7c595f241fa311d7b227.json new file mode 100644 index 0000000000000000000000000000000000000000..001490e86d95d953f24ecda29caf70f794c31afd --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/7c595f241fa311d7b227.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 512, "hidden_size": 128, "intermediate_size": 256, "num_hidden_layers": 1, "num_attention_heads": 1, "num_key_value_heads": 1, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-llama", "checkpoint_revision": "7fdafd2fe6a2d31c6abb72ae60db606d8bb23196", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/b650b9d41c4386d36a89.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/b650b9d41c4386d36a89.json new file mode 100644 index 0000000000000000000000000000000000000000..218e70b904c546a08c119ec0df60d23a1abaef70 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/b650b9d41c4386d36a89.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 512, "hidden_size": 128, "intermediate_size": 256, "num_hidden_layers": 1, "num_attention_heads": 1, "num_key_value_heads": 1, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "dacorvo/tiny-random-llama", "checkpoint_revision": "7fdafd2fe6a2d31c6abb72ae60db606d8bb23196"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/04a286f25641fb37c0da.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/04a286f25641fb37c0da.json new file mode 100644 index 0000000000000000000000000000000000000000..a4ed3a44f9e6c5dd4d80a3c383a64527a67da866 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/04a286f25641fb37c0da.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/09bd40b23d9644abeb4e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/09bd40b23d9644abeb4e.json new file mode 100644 index 0000000000000000000000000000000000000000..f4a3e636ebb485e5416982cbd766281f44a12239 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/09bd40b23d9644abeb4e.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/39695bb575ebdb0bdfc1.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/39695bb575ebdb0bdfc1.json new file mode 100644 index 0000000000000000000000000000000000000000..c2ecf306ba79ebfc9336b4c9ab5e7f6ba2859028 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/39695bb575ebdb0bdfc1.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3a328b55c6445b520b8e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3a328b55c6445b520b8e.json new file mode 100644 index 0000000000000000000000000000000000000000..3a08bed7d6cd89dd5326bdb6083188a853b02f92 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3a328b55c6445b520b8e.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 16, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3cad45de92cf81a0a16f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3cad45de92cf81a0a16f.json new file mode 100644 index 0000000000000000000000000000000000000000..3557a954d8cc2b20228f4bfb10d6e73dbe86958b --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3cad45de92cf81a0a16f.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3fa0092e4157c2ddb47b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3fa0092e4157c2ddb47b.json new file mode 100644 index 0000000000000000000000000000000000000000..e53c6c8ec5cb0f5d23fc11da0559a8e8aa10f2e1 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3fa0092e4157c2ddb47b.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/44c81784d74cd4713969.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/44c81784d74cd4713969.json new file mode 100644 index 0000000000000000000000000000000000000000..71e5e7b171bdb6aeffb73728941269f232025d3c --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/44c81784d74cd4713969.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 8, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5424b76f5346f48c87dc.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5424b76f5346f48c87dc.json new file mode 100644 index 0000000000000000000000000000000000000000..586da8ab48a3ec23ce2229874e81d333ebd0345d --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5424b76f5346f48c87dc.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/58eec5854a9f564f8a27.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/58eec5854a9f564f8a27.json new file mode 100644 index 0000000000000000000000000000000000000000..e17d1b828cd92e19d778a0a6c2baf32e56a2404f --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/58eec5854a9f564f8a27.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/6a314b3d69ca329d2711.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/6a314b3d69ca329d2711.json new file mode 100644 index 0000000000000000000000000000000000000000..571644cf77882c1373478943036f95cf6f3f7703 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/6a314b3d69ca329d2711.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a0a7caf91982c3c67b0.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a0a7caf91982c3c67b0.json new file mode 100644 index 0000000000000000000000000000000000000000..b6c61842bfd7ebe476f18609a83e8f3c1e155e51 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a0a7caf91982c3c67b0.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7e2416095368bdd0a5cd.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7e2416095368bdd0a5cd.json new file mode 100644 index 0000000000000000000000000000000000000000..45033df9d6480802370abba113d4732d1bfac4bc --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7e2416095368bdd0a5cd.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ad1f7e0ca3ab366f91d4.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ad1f7e0ca3ab366f91d4.json new file mode 100644 index 0000000000000000000000000000000000000000..126ab469041bd4993b166a55583245e30f26607f --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ad1f7e0ca3ab366f91d4.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d49c97f13a4593173a49.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d49c97f13a4593173a49.json new file mode 100644 index 0000000000000000000000000000000000000000..8e57a50375831667c86b3ae4590da021d4eacd98 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d49c97f13a4593173a49.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/dd0ae8e9d982a2ed293b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/dd0ae8e9d982a2ed293b.json new file mode 100644 index 0000000000000000000000000000000000000000..4f622db34f02fd43d560b8a82ded67831a9605a5 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/dd0ae8e9d982a2ed293b.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/efa4119dabd9c1f8277f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/efa4119dabd9c1f8277f.json new file mode 100644 index 0000000000000000000000000000000000000000..f2664645ffbe6894a48eb6b9f3ef377c06ae6d14 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/efa4119dabd9c1f8277f.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/059827c299e8d9043f57.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/059827c299e8d9043f57.json new file mode 100644 index 0000000000000000000000000000000000000000..362d8a88eedcb88c31757b3416e39ebd8a80d0b9 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/059827c299e8d9043f57.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 2048, "intermediate_size": 5504, "num_hidden_layers": 24, "num_attention_heads": 16, "num_key_value_heads": 16, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 4, "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/ece87a51a12bdc2169c6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/ece87a51a12bdc2169c6.json new file mode 100644 index 0000000000000000000000000000000000000000..3ec01cc447a550c55b9ff0fd53391fdf6d47b7d2 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/ece87a51a12bdc2169c6.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 2048, "intermediate_size": 5504, "num_hidden_layers": 24, "num_attention_heads": 16, "num_key_value_heads": 16, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 1024, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json new file mode 100644 index 0000000000000000000000000000000000000000..31af98819e3fe66d4a30ddc20d3b5550febb6388 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json @@ -0,0 +1 @@ +{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "attention_probs_dropout_prob": 0.1, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 32, "initializer_range": 0.02, "intermediate_size": 37, "is_decoder": true, "max_position_embeddings": 512, "model_type": "mistral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM", "checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2, "pad_token_id": 0, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "float32", "type_vocab_size": 16, "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/43c8baaa3d89767fe7f2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/43c8baaa3d89767fe7f2.json new file mode 100644 index 0000000000000000000000000000000000000000..c2228d7f2012b5f7d87e3ae0e83baace811771bc --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/43c8baaa3d89767fe7f2.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 512, "hidden_size": 32, "intermediate_size": 37, "num_hidden_layers": 2, "num_attention_heads": 4, "sliding_window": 4096, "num_key_value_heads": 2, "hidden_act": "gelu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "is_decoder": true, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "attention_probs_dropout_prob": 0.1, "hidden_dropout_prob": 0.1, "model_type": "mistral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM", "checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "type_vocab_size": 16} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/f9b607c62a5393e0b42f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/f9b607c62a5393e0b42f.json new file mode 100644 index 0000000000000000000000000000000000000000..e10ee435cc28dbf6076c9f13ed2d75402909dc08 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/f9b607c62a5393e0b42f.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 512, "hidden_size": 32, "intermediate_size": 37, "num_hidden_layers": 2, "num_attention_heads": 4, "sliding_window": 4096, "num_key_value_heads": 2, "hidden_act": "gelu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "is_decoder": true, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "attention_probs_dropout_prob": 0.1, "hidden_dropout_prob": 0.1, "model_type": "mistral", "type_vocab_size": 16, "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM", "checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/7a99245071db1d4e48e6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/7a99245071db1d4e48e6.json new file mode 100644 index 0000000000000000000000000000000000000000..e6549f2f7ecc2fc757e69148ddbe1c11b12160ca --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/7a99245071db1d4e48e6.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.1", "checkpoint_revision": "73068f3702d050a2fd5aa2ca1e612e5036429398", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/b0a5a2e1e8dd02dbe923.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/b0a5a2e1e8dd02dbe923.json new file mode 100644 index 0000000000000000000000000000000000000000..f5d3e1177bc37a296eef3b01431dd4c272b98594 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/b0a5a2e1e8dd02dbe923.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.1", "checkpoint_revision": "73068f3702d050a2fd5aa2ca1e612e5036429398"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1c4540cc29dcb3b99ff8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1c4540cc29dcb3b99ff8.json new file mode 100644 index 0000000000000000000000000000000000000000..861374c248966264a833725e14dccf2ff365aefc --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1c4540cc29dcb3b99ff8.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/2d6c1d9513debbcc5f7f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/2d6c1d9513debbcc5f7f.json new file mode 100644 index 0000000000000000000000000000000000000000..8717e5ea05f45343e51d9d9127f9146fd889c85b --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/2d6c1d9513debbcc5f7f.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/41284a607429208347c0.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/41284a607429208347c0.json new file mode 100644 index 0000000000000000000000000000000000000000..9ef47daf5eb58f0f367916934f5ba30107401987 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/41284a607429208347c0.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 16, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4620b76c4f39e71a8459.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4620b76c4f39e71a8459.json new file mode 100644 index 0000000000000000000000000000000000000000..6fcf360e41b9981b9e39cbadd1b8f2610ad98d18 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4620b76c4f39e71a8459.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4fcf5530fe99f6f4f3e6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4fcf5530fe99f6f4f3e6.json new file mode 100644 index 0000000000000000000000000000000000000000..47d833102e13b8f6dec92c72f8ac075e2163bc03 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4fcf5530fe99f6f4f3e6.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5c9bfea69bb260a59b0c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5c9bfea69bb260a59b0c.json new file mode 100644 index 0000000000000000000000000000000000000000..e13275b621f32a6ae29f76bff50250ed818abe09 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5c9bfea69bb260a59b0c.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/698b844f07e07829c78e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/698b844f07e07829c78e.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3d4e16d7eb1d8a5d460741c701dc0f9683e65f --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/698b844f07e07829c78e.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a2a8d3c93367a33b250c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a2a8d3c93367a33b250c.json new file mode 100644 index 0000000000000000000000000000000000000000..c4bdd556d8750e46c9457aab7a849c13625e5804 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a2a8d3c93367a33b250c.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 8, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a61d8247a227bdcc16f2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a61d8247a227bdcc16f2.json new file mode 100644 index 0000000000000000000000000000000000000000..f0d74fb21c4ddf85f4b619ec851da0b449e9934d --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a61d8247a227bdcc16f2.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 32, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca9ec6b16e1a62dbd649.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca9ec6b16e1a62dbd649.json new file mode 100644 index 0000000000000000000000000000000000000000..6a76e68941ce60f19fa2f6854254c6e52d02f640 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca9ec6b16e1a62dbd649.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/d75fbb50baa6a3b15792.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/d75fbb50baa6a3b15792.json new file mode 100644 index 0000000000000000000000000000000000000000..72862602592826178c33364062d75c57d4e63cf4 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/d75fbb50baa6a3b15792.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/de7f8e3eb7e911ee8559.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/de7f8e3eb7e911ee8559.json new file mode 100644 index 0000000000000000000000000000000000000000..c8f638043142eea0ce1c4d46bec597647f81d487 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/de7f8e3eb7e911ee8559.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json new file mode 100644 index 0000000000000000000000000000000000000000..1e62d97b754c28d5d38a5b14d60203cc413bab77 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json @@ -0,0 +1 @@ +{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/06d7927d0a3a008f60b6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/06d7927d0a3a008f60b6.json new file mode 100644 index 0000000000000000000000000000000000000000..8d9a9efc37410a41298d0cce65bcfe0c41947604 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/06d7927d0a3a008f60b6.json @@ -0,0 +1 @@ +{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/12212376258a9fed88b2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/12212376258a9fed88b2.json new file mode 100644 index 0000000000000000000000000000000000000000..a8ffb1006893726a4b83ed0978c9033c19b3c397 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/12212376258a9fed88b2.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/15f566c81d1f67f6fd2e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/15f566c81d1f67f6fd2e.json new file mode 100644 index 0000000000000000000000000000000000000000..444175f1611896b536a80f7bb5bcef7d39d0875c --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/15f566c81d1f67f6fd2e.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/192bd7f0468f78103585.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/192bd7f0468f78103585.json new file mode 100644 index 0000000000000000000000000000000000000000..5b074670d9bd7212b7d649f0db550918ccc9e1b4 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/192bd7f0468f78103585.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 8, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1a58aae2f6a4ddf4015c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1a58aae2f6a4ddf4015c.json new file mode 100644 index 0000000000000000000000000000000000000000..2c461588d489ba62146cd9d980546324d54e9585 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1a58aae2f6a4ddf4015c.json @@ -0,0 +1 @@ +{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1dd1302a33c9e69730d2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1dd1302a33c9e69730d2.json new file mode 100644 index 0000000000000000000000000000000000000000..af539ae303508a9efe3986c30f8988afc8c96bfa --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1dd1302a33c9e69730d2.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/21bbf61928931f72378f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/21bbf61928931f72378f.json new file mode 100644 index 0000000000000000000000000000000000000000..4c82221ce76d8c25feeaf493782ed0bccb722e27 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/21bbf61928931f72378f.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/236943bb2fa2da236c92.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/236943bb2fa2da236c92.json new file mode 100644 index 0000000000000000000000000000000000000000..fb4e0a917d82a4718bfd64935dfc90a1728131d1 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/236943bb2fa2da236c92.json @@ -0,0 +1 @@ +{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/258d4c94617de7988dfd.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/258d4c94617de7988dfd.json new file mode 100644 index 0000000000000000000000000000000000000000..c38b4656d31d8f54c5f1c03bd3316ac57a78a2e1 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/258d4c94617de7988dfd.json @@ -0,0 +1 @@ +{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5141bf1fc4c434ce1ea7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5141bf1fc4c434ce1ea7.json new file mode 100644 index 0000000000000000000000000000000000000000..dc3b3f34e83fe233c5c1dd80af5b07a822eb81e2 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5141bf1fc4c434ce1ea7.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5384fd5e39aeb90be6d5.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5384fd5e39aeb90be6d5.json new file mode 100644 index 0000000000000000000000000000000000000000..1ac313f7c77e95ba82f3d50c71ab6da08cf63d0a --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5384fd5e39aeb90be6d5.json @@ -0,0 +1 @@ +{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/67dd8bb21d625e22cd5b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/67dd8bb21d625e22cd5b.json new file mode 100644 index 0000000000000000000000000000000000000000..1737c97088e03d46e16b1290cb0f7a7ecfa0a99f --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/67dd8bb21d625e22cd5b.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 32, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/81be8abab2484836ea97.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/81be8abab2484836ea97.json new file mode 100644 index 0000000000000000000000000000000000000000..ebb90c65929cb44220ef9b63127a1debed153012 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/81be8abab2484836ea97.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9e73f279a5220e46dcf1.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9e73f279a5220e46dcf1.json new file mode 100644 index 0000000000000000000000000000000000000000..a6735f184b2da971105c7c846194a388d2cfffea --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9e73f279a5220e46dcf1.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9eb34c5c51dae5739571.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9eb34c5c51dae5739571.json new file mode 100644 index 0000000000000000000000000000000000000000..41ab542aca365efd0911c9ba1cc82abd1e95adbc --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9eb34c5c51dae5739571.json @@ -0,0 +1 @@ +{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 16, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/0eb6d808c895dd98f1d9.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/0eb6d808c895dd98f1d9.json new file mode 100644 index 0000000000000000000000000000000000000000..dc4e71f6b73c2e8e64611200bace5fbd55063e3d --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/0eb6d808c895dd98f1d9.json @@ -0,0 +1 @@ +{"torch_dtype": "float32", "is_decoder": true, "architectures": ["OPTForCausalLM"], "bos_token_id": 2, "pad_token_id": 1, "eos_token_id": 2, "embed_dim": 16, "model_type": "opt", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM", "checkpoint_revision": "190d1f4fc0011d2eaeaa05282e0fbd2445e4b11f", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "vocab_size": 50265, "max_position_embeddings": 100, "num_attention_heads": 4, "word_embed_proj_dim": 16, "ffn_dim": 4, "hidden_size": 16, "num_hidden_layers": 5, "dropout": 0.1, "attention_dropout": 0.1, "activation_function": "relu", "init_std": 0.02, "layerdrop": 0.0, "use_cache": true, "do_layer_norm_before": true, "enable_bias": true, "layer_norm_elementwise_affine": true, "_remove_final_layer_norm": false} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/244d78257ade535a7f03.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/244d78257ade535a7f03.json new file mode 100644 index 0000000000000000000000000000000000000000..43b4233503304a5de09277fa3d642923826a1475 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/244d78257ade535a7f03.json @@ -0,0 +1 @@ +{"torch_dtype": "float32", "is_decoder": true, "architectures": ["OPTForCausalLM"], "bos_token_id": 2, "pad_token_id": 1, "eos_token_id": 2, "embed_dim": 16, "model_type": "opt", "vocab_size": 50265, "max_position_embeddings": 100, "num_attention_heads": 4, "word_embed_proj_dim": 16, "ffn_dim": 4, "hidden_size": 16, "num_hidden_layers": 5, "dropout": 0.1, "attention_dropout": 0.1, "activation_function": "relu", "init_std": 0.02, "layerdrop": 0.0, "use_cache": true, "do_layer_norm_before": true, "enable_bias": true, "layer_norm_elementwise_affine": true, "_remove_final_layer_norm": false, "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM", "checkpoint_revision": "190d1f4fc0011d2eaeaa05282e0fbd2445e4b11f"}} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json new file mode 100644 index 0000000000000000000000000000000000000000..66b101aaeb2d861a356a107afb2aef0b7115a83d --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json @@ -0,0 +1 @@ +{"_remove_final_layer_norm": false, "activation_function": "relu", "architectures": ["OPTForCausalLM"], "attention_dropout": 0.1, "bos_token_id": 2, "do_layer_norm_before": true, "dropout": 0.1, "embed_dim": 16, "enable_bias": true, "eos_token_id": 2, "ffn_dim": 4, "hidden_size": 16, "init_std": 0.02, "is_decoder": true, "layer_norm_elementwise_affine": true, "layerdrop": 0.0, "max_position_embeddings": 100, "model_type": "opt", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM", "checkpoint_revision": "190d1f4fc0011d2eaeaa05282e0fbd2445e4b11f", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 4, "num_hidden_layers": 5, "pad_token_id": 1, "torch_dtype": "float32", "use_cache": true, "vocab_size": 50265, "word_embed_proj_dim": 16} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22/inference/llama/meta-llama/Meta-Llama-3-70B/4f0264dadde55c3f2f3c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22/inference/llama/meta-llama/Meta-Llama-3-70B/4f0264dadde55c3f2f3c.json new file mode 100644 index 0000000000000000000000000000000000000000..9ebcf04da1bcad4307026db088875ad057444534 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22/inference/llama/meta-llama/Meta-Llama-3-70B/4f0264dadde55c3f2f3c.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 8192, "initializer_range": 0.02, "intermediate_size": 28672, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "meta-llama/Meta-Llama-3-70B", "checkpoint_revision": "b33784c5adf6e4b1a60d041da74e83fd438d67cd", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 64, "num_hidden_layers": 80, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22/inference/llama/meta-llama/Meta-Llama-3-70B/c85c3e7900a7071eef5c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22/inference/llama/meta-llama/Meta-Llama-3-70B/c85c3e7900a7071eef5c.json new file mode 100644 index 0000000000000000000000000000000000000000..c685b5732f9fb6491478ea08042e14639e4e2f9d --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.22/inference/llama/meta-llama/Meta-Llama-3-70B/c85c3e7900a7071eef5c.json @@ -0,0 +1 @@ +{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128001, "hidden_act": "silu", "hidden_size": 8192, "initializer_range": 0.02, "intermediate_size": 28672, "max_position_embeddings": 8192, "model_type": "llama", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "meta-llama/Meta-Llama-3-70B", "checkpoint_revision": "b33784c5adf6e4b1a60d041da74e83fd438d67cd", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 64, "num_hidden_layers": 80, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256} \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1cc4b47c1a8635006dfd5572c022ba19702d832e --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b183450a9a7d975ed538491ab1b1ba6301c187f5bbc7384a45c66fc912fe869a +size 752116 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7e30c47b06c2904f1da569b839757ec5e5c3e8e7 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_07830cb62c4a0ba392e1+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:accd4638c0aab7ec2148631685145377d02c8015664252e000793b25112261d8 +size 8274944 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8b2e3276ec650ae7d8d35d7ca15a4b4ebf46ee2 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed60d1ff159be97aeb36db402d658e2accdcb9f13b9220b9659f82ba691f7f6 +size 743294 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6c757d3458b1cbe8c992780b3ed03db4f797cb26 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_2624e80c4e53734fa89f+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d20e623db84e203828343ab7f4e752226cb50017ff9b01a8b2b7d46fb2d5d33a +size 15023104 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fe2ed717090f72d56c4ec889a583fabd5d7351ca --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f43673ca24cc01e4cc8f2d52dae1e7ac405e292aeb5c14deb10993c99b6276 +size 743294 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..13f672eced264a960a17d7f2b7e231ccfedf7dcd --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_354db23f0144f74c570e+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e1e3eb7c1af89a8674dec3d5aa189c9a72258bbc3b9e6352fcca133c42b918f +size 27700224 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a493373608dd2e2a8bf9705a705ee0692ee04b10 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8df95e5492ce7b507bc9b7535205ff0ecee97b74aaaf74d8d58190c12d56792b +size 958301 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b088b6347f9e04285c321019374761b8d9a1159f --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_35ee985c28310b8a296e+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2b9162af9d2ee44fc2f3a6985a18600a4a452b38bc49ca87ec5cd23a85cc0a +size 15197184 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e36911b2114cf5887042d2a35c0b45361c824f18 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1da37666624126d8b7170c10ada7df117bb299b51f28901590a55ac28c491b +size 743294 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..574de64cf2feb18fd9b0f0aff0d7c453c7cd9172 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_49ced6c57ff86101015e+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116b42fcc7b37aca4fec84a2952b7f6c7e8a96995a5f8ee7b736a535ddf7cd74 +size 12319744 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ba6aa6c03e4e5e71d6649e88747c9e134968bdd4 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ef3bdbf3ec40fe1d7577089645892a9a0c71608a87f98085ad62262eb0be61 +size 752116 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ae836a76893b43ee4a7a272f4775962557e25ac4 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_5310c423645d56c069a1+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed1aae4118e793e59dcaac3eb25195a587fbc43bf8685a5f318e917c603f192 +size 8285184 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..12e0e184c27f1a1321808656d80dabd7a31a4917 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f10419b7b2778a8d22fe69c4ef9bd66093c0e6976d1d6a8814d1906f20a4ce +size 743294 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fe4dffd54819430b642c2462e9667920370a4e9c --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_56710e033c6feca053ca+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3074ac6639d5a9c402732fe2f6d3824e8f83a0bf1c1e59aa3c50682d820ba024 +size 11387904 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ea0075fc859b90d6b4a32c57eec7ca73d1313a8 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220e62e14bbb36da6e8982a2fe6e7db5fff44c2fe641bf1e1e19785df0bc32e5 +size 752116 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d4ba283276075ef486a172ba8525a5a50cc8f16 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_747d55f073445352daf0+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd4b4317a0496400ea28162b7a19d865aa0650e6741122ac9d6aab2a1d546a9 +size 8520704 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..933040f68d415df648256043ebeaec47332b3453 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b661835f7c78da97adf4c6b3715fbb47076cdef33e9fb665cae332c6b4ea386 +size 752116 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f183e1f9f83dbb8b15339f9bb7cf53380b10dedf --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_7653a694a1679de6e78f+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896866d4d18d6b7870625dcb5bfca050b3a4c4d765a25379acceffe19cd56259 +size 8776704 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_7abaade23f23cc95d209+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_7abaade23f23cc95d209+2c2d707e/model.neff index 801b24437b17755d8c008a711d07a90ffcf8e9f7..0723ada36f42331f0227e8f1b78e69682c73c7a3 100644 --- a/neuronxcc-2.13.66.0+6dfecc895/MODULE_7abaade23f23cc95d209+2c2d707e/model.neff +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_7abaade23f23cc95d209+2c2d707e/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cf78c85ab7806cc2075ff607e2b006d2191297606973107073641b32c6804b2 +oid sha256:29dfd77ed3670a75c09a7c3c6987384b80785b32af1444a41ac687501836fee7 size 9606144 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_b48f6502874ac8a8f5bb+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_b48f6502874ac8a8f5bb+2c2d707e/model.neff index d5d8176c4260baf6803f32eb7937e25b8c2cf5fc..e8ab277342af0b897819df7eb4add3ac40bbb906 100644 --- a/neuronxcc-2.13.66.0+6dfecc895/MODULE_b48f6502874ac8a8f5bb+2c2d707e/model.neff +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_b48f6502874ac8a8f5bb+2c2d707e/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e100a1f39124851ac66beac14774e9e19f6ae128b83cc2bf8f35193f188b07c +oid sha256:abfbcac10c4cc4366e2acb0582265a278b738a69d6a0462aab3b70ef62cef1bb size 10497024 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7b3eb6b62425e59923cc5704e7ec038b197cb713 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b7121eb33bdf2b9d5d95999f30d06ef30f26d6e65d3b901785d84f9c4f9697 +size 844626 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6a9db0fda47e458c32318eaffb03f1249b5f5641 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_b8581e11932985674beb+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f058cca23b88b801ccfa76b924f080a0ed6dc8fd8e656d6641cdd93070f9940 +size 9718784 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..da956dac503a431c4887f3b1ba2649f55aed399e --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:717d2e9176cefe024c7e40500416a35d99d0b92add2aa4e160e3c9dd5c90f3ce +size 752116 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..07e38a1477d15be7707bb2ba5918eafbb2d907e2 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_cb4f665c1e3342f5d241+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cf99635c435a8b60453147454dd070b8c62c1e01c0b631dc9dd331de14283d +size 8397824 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..db9818f4d4c292fb3ffdd294b7210233c9c4e008 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af35a1b3671eb03d4bc1419b4f5d06ef674a645812a132e5d28dbdfcb2d64b72 +size 752116 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..832f51300170e7029c0ecfc3addbb9780c0ac266 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_d366121804055516f63f+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245981e6cf8b017a7c5ff74cf199a76f396c94e5d980c62d4dae5e5e20e3665a +size 9370624 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..87dc3d0b5894d97639be9b882ba887dee490a89c --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c77c86304217aee491ceb8ec0e00b47c86acf8fad6269645be6aba52f961f9a1 +size 743294 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3d4b6377634b713c5531a6b2c88beb550ca14e96 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_da34f50cf6e3a3fbbde0+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a358d5a88597b6c88cf10e0f2b2494446056e9167ef7ebaf68b0a1d6b64a1b67 +size 4844544 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/compile_flags.txt b/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/compile_flags.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd72d102654c531414d7f703987c9729e8cb438 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/compile_flags.txt @@ -0,0 +1 @@ +--model-type=transformer --auto-cast=none \ No newline at end of file diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/model.hlo_module.pb b/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ee170145b7f9d4b1de64727c0899bd990170fd12 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6374d7d9c5d400b722b03acab2c8d85cb1f0ec860a06f179689a5eeef3273887 +size 734223 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..82d8438b9e144365df9bc6f395423cbff4a657b9 --- /dev/null +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_df19689e1c1380996cd5+2c2d707e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a6c2d4983aa3d8983137f842f8c72a167c606cb650ddf47dd63dfe323e67245 +size 11930624 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_e3ef39f42931d27282b0+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_e3ef39f42931d27282b0+2c2d707e/model.neff index cefa79c23bb9d72543fa06eda8fddba644d257f7..b354b373266d2c7a8f177b410cd643001d4429ab 100644 --- a/neuronxcc-2.13.66.0+6dfecc895/MODULE_e3ef39f42931d27282b0+2c2d707e/model.neff +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_e3ef39f42931d27282b0+2c2d707e/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eea54fc3b8391c171d440a67d4d3548f522b54dfa767c512aaca55510a8f4dd9 +oid sha256:20b9ea1c0b50b7da5b2aa11c351b050f1f8f52b9f060ece18091cdf7545aa617 size 15770624 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_e7dd81fdd13e1e7d9a5c+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_e7dd81fdd13e1e7d9a5c+2c2d707e/model.neff index 6a04feeefc2bdc129fe6576c30b5c310ee24c337..89f01d16134714685401cb44ff32a26c63deb134 100644 --- a/neuronxcc-2.13.66.0+6dfecc895/MODULE_e7dd81fdd13e1e7d9a5c+2c2d707e/model.neff +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_e7dd81fdd13e1e7d9a5c+2c2d707e/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87a210ffa48a264bd7529b9ab102872f51e67833c6186d62f429bedbbda3a216 +oid sha256:f2183834c28e845c2f089587c357faae4dcbe7bbb9e73aa7181d08480073ca65 size 27331584 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_f346261058789fe5d56a+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_f346261058789fe5d56a+2c2d707e/model.neff index caf34067cbd288db37662b48636d8a223ffb1c9f..9c16a005008393b8ef59b0855fbe1a34e4bd797a 100644 --- a/neuronxcc-2.13.66.0+6dfecc895/MODULE_f346261058789fe5d56a+2c2d707e/model.neff +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_f346261058789fe5d56a+2c2d707e/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2167d688a780bac598db04036504f66f6102e6b551376f823abfdddb64719bfa +oid sha256:8f4ca88311b46db7fb08764b296172b083357e0f90d6a308f0beddeabbefc771 size 27331584 diff --git a/neuronxcc-2.13.66.0+6dfecc895/MODULE_fd99f71acc89fead8df7+2c2d707e/model.neff b/neuronxcc-2.13.66.0+6dfecc895/MODULE_fd99f71acc89fead8df7+2c2d707e/model.neff index 02e742c139ea11f011b6a64c5fd559325b64c9a9..3143745b5beac58c9b3734baabafb4af1d6f2053 100644 --- a/neuronxcc-2.13.66.0+6dfecc895/MODULE_fd99f71acc89fead8df7+2c2d707e/model.neff +++ b/neuronxcc-2.13.66.0+6dfecc895/MODULE_fd99f71acc89fead8df7+2c2d707e/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52a119fe64ae5207b13f02f8577981b31a9e516b708f6297fe8da717eb31797f +oid sha256:9f777b54ba05cbaf3ce1babce3c20bc2c2388aeec0bb4867bcf98f1a3478657c size 18596864