Upload folder using huggingface_hub (#1)
Browse files- 94af3249ee7d2f6b8be2c389a2a532e25abd3e942df5aa49194f06624d6ec7df (df15c8e7968db4a15de24735690f4a788b59ee08)
- 62ec0adb2e6eb61b84bf5753b8d55937675316bafc0e2fdf25d11d6795dfa25e (82f99016ca11306c72cf7870c5b47973a7025f84)
- 412d6cdaa4d4d74e8381bb114a41f7c6801177a2eb8de0cedd99f6dd1a6dbb6d (6c7dab0ec7726f28301dc8bbfa560ff13fdf7004)
- 848585d11936c4ae112bf93bdb59e5c759aa2a948a29f5b348a21692483ded0d (31c80b91151cec920f1e1e84442511cc0471a8be)
- b6261dfaf7c3a8aeabf16b4204d3fe0ab6a2973320e9698aa480f4c4fefc0bc8 (dc4a1e2d9e3026ae69a7331847617986f2b06484)
- b1190a762b308fca274cd5b94f07c056b291b57eb45e64c23c9f3eb0cb2eccb2 (e0cd4ed8bb9b1de55367bacd186b440055e3bc9c)
- 5c13dc3596290b66b847eb68af8eee320d5cdbedecd932ced9db3676f12116dd (7f5e4c9e784b130b829c0c3b06266221e6b80172)
- 63dc6b21fa0cb02b631da79d5c11168ccc310d4a7d03a71af390090340092d3c (4b295ca4ce303037c72b736676ac2627ebc91595)
- ebfd399034a4ddc90b959eb358013d1247f31ec237ee8b63fdf29bfaa0d440b5 (01ad1d15d43903ff0abd112ae43cec38e397663a)
- eb0a6533b36e84c8b2e7a36ebf5e5923b543c2a7653d628976333cbdc9e80aa0 (a6a3a3553a3406433b1adaa92c696fc41d3bec15)
- c20665c5f23317785cec4c9d0f7da76320d5236daba6c1a221334726d790eadd (1794d11175fcd29d7d50f1a897b64cfd9fab0137)
- ce858e8dc9990be378e9eb741afd4893a9a815a0e7f001cf9fbbc7a7a78984e9 (d1c1bd8260cb043c6c7ac7c6a295f49f3a42d7f9)
- ba3c47abdaaf7b18127dbedabac08e41a5615f84313f6e51150a6eb38ab4ec92 (1d4444e8c62513a7db86aa98c58d43dd86f2c250)
- 2a1440f1980ef647bc431f7d01a7fecd4311d733f161bad6670c4c06b3f93b02 (fbf5e925072bb59236fad297b687cfaad63bebec)
- 121a0045e764e701c2a4bb1b67be01fe422c5935e71f312e7cffdddf8c3ce91e (caebecd7f93cd533d7f519031cd0214d93e5d428)
- 00684905142fd5ca6093cae55cde0a0bf816a296294347cc63d59edfb648aed1 (f7e9ab61d55afdadd4ad9138a9f0f50da21c94bc)
- f2fca6496e27649572bb3d50976660aa6e1212bb29517e8bae7ea05766b57ad8 (40c9eeb13d757524bc0ba8e264ec44009e3a6ea0)
- 67271d62879cecdfd5ebe7180421781a589d111a944ae6915c018e1f2400884d (7c7cd95f70aae7657a1b8de6fc39ac09b1cce869)
- 3997b6ff6c69cb8c4fb2e8595c5d9aeb4cb813a9c26c482ad479d016f7d7e89b (ce4de135747ddf5e32415b6dc224d765b4e5ee2e)
- 09e08699082b878ed8d9a54acf2762db31c24d7edfa266a20640c042056ce41c (def6b545391b37f6e7d1eca6fa9a4d09f6114805)
- 9c2d4c0e364dbca0fff46e6fcc881682d7de168f3918563849a7fcf712d63ec0 (0a18b9779a44f50c0f9ed00cc4cb7e9dd5fd39de)
- 901c3c78b7f74b4022cd2745c381b08924dee6d5bb487b4eca4fe6ba382e8208 (7acd1f7688a10057cc3e2dcb68e710598a51d073)
- 8004e80664ae55918df6a582838a375c00a08fcd2d02a1d126f39c3a0742a36d (67aa82f8bfc304e58a2c17cdcf94970662d9567f)
- f8949fda5a613104702af6da5971c96c7fb3edd8c60d3eace9a6d55643480cce (3a03090755c5346ee85df97364d3a1bd736083b7)
- ebb0eb0f116d8704168107e0dc6b4448e1345782dee8d61d0a64cc46b0799466 (9fbe051ffbdc5e7bbd215575df943022018de970)
- 8785f45fc040fe79bd5cf21743c792e45d78f3197270e06da2d8c45e20533413 (698c045206713048d62b2c4d718b6a6ba99756ef)
- 741bdd74d3f2a553d717a94b1f0cd884ea959819cc455a33747ec14224ceace8 (1e9ced62984a72df54e2ad4f713e0a18c8b5ed68)
- ae44b464198fb42818814052f3f3b3881a7b33bbc5ebe633c1a71df83401fe38 (75d8e2058fcdd70fa913b1b28086e2fb9e8f86d4)
- 179c55eaa9f2e961752ab2fa32c63faa05f1e161396368566e663d641e9b2e95 (8345d3c8ab52c5fd8f25c4d1e9fd7f8baaddfc60)
- e2957fbd388fac7a32132f0e1b55975cbb769ba39f85a3e0db65af930604a609 (832219a75f6e30933a429bd59d0ddd7f2ffcd7d8)
- 6705812196fc694d4e1e9cf438d0e8e67ead54ecf0f0af5bdf5bf8e8f1e0eea1 (4d5629fe2ab00c24555feaa8793388d29463a265)
- 52837442612124c9c9bb5233b20375f107744a77620856b6fc0f1a60cc2b8c85 (7e871ffdb471d4cf98676d3ba288245692e12464)
- 13cafd840b0f1f35d94e8f743b29819b9b6763093851cd0a46a91aa1432e23be (a33e51a91c9f009a9d8fcb6f7bc107b202b67995)
- 3350024eeabd99f4dccc2c3824fc11da46ef3284199de7247fd1a079621696d7 (714c0a61e55148e9abeb0c26b00bb790e73ad78c)
- c9c9a18ead97df737c71124e5fcd02ac6e3360d0cf917c8a87e328f9c9fa1cc9 (e101b0649cd2ca454121c5c350aa85f04be16287)
- a103c940c961368b32ee2b306570bea3c6f02f223e3057d7a5e1ffb85412c700 (2b39a6facf0c93c37aa27b6a69ba31849b415f65)
- 372638ea9803605831171cac9add101fa21e78eb828b5d6c0cb5f794863e02e6 (b5436a2e078fe263ecef51ebc6dd8df2c21e557a)
- 039241a347695a6cbe01646105f544b8fafdf2c6b57a948bebe02b3b242e70c7 (b5ab169dbb361605c2956d14c90d38dfe16a24eb)
- d3464a3dda25bed02f33528d17eefa7a6174e2f310a42ff51cf081e59e18d00d (442f59e7e8208c650d6bcb2fb8a5e1b52d753f6b)
- 5860194d83aa6c31789555105659e6641529d8bf2ab998da9454a910a9ae2a2a (9f5ff064f5c0a48845b5b50b334202c81c1f39d1)
- fb8112f30fe67d8ed6127d87a546eaa5e2f4fcb41b1e60b0b615aaa13b75df20 (3f907711104336280aee10d3c92b400113b24115)
- 7399f9197a0ed39a9ae6fdffea87ef092d309c6f27dc096895929f20a32ab94f (c82f83089476a1ebef4bea363ed74562ae744140)
- ff1cf3a250ca5542cbeba0177fa3ff0c8e3cd8b106bce020d561bcc50c5255e7 (ebbb508ed49694f5a73451bd1c302e6005002777)
- b7340f108dfd263e1457bdf8cbf0747d97bf50e221a1b00ad9f6e066d1516ca3 (fee1e0f7582e1058d977d3c13f66a4ac5cfc5aaf)
- c64d4878e5b08398e3220973cfbcf653d51c7723c7945afaa5ac3189a7f866b3 (2335ab2c7a187d1bcc66df3ea354d098c0a2db10)
- fbe73ebc9846c7f284815109a4d5078243d7317feeaf9b18e4c1f892f565117f (0bd814d8a989082a4ecd8b451af509941d1c840b)
- cd5a0c152656f885ef2e0b70b9ef200ab1c1472f4d928e94b7a3098ac804b6bc (7b217ed8ea096c79bc41b61acb5938a259f38cad)
- a8cadbf0fe87dd61e102196e337da7b1a6e25ee009025ac6b159a583972cb55a (97c86e9dbc0be8598fbfa21a00e426529eb3df49)
- ac825f5998dec81cb71466d4bbc5c03cc83db88a27a9e870998928d45c245e58 (8cdcb62b0b3cfeb7abdd0d1e331c220deecd1dea)
- d5a8550526c35940bfcef1f0fb3af4065b96b847115e02c7d268ace2c98c0af3 (a73576e9b03235b4c4c657e6484eb5a68dfd7e93)
- fba5c2437f8af7d91bf3c30af0e9776747d0888ce69dcd3edd81c1097e7474e0 (7a2a960c2f1411cf506bf9181d1e900b48273e04)
- 90bb393b644f27c80abba53566ee0c8325ade8e86a08efc8b1f815a4fe6e8cdd (8948e27f1942cb9237360feb40f37061c2ad49a9)
- 3648ab95925cf7ea5ecba4525589ba464f4c321beca1f47b15154f075ba87fa1 (101e11c5a860fc7e8346c4da5f7bda4088ffbb72)
- fe904876f884f632ddcd034acec523bfb870b804775c3ed1a40389cd9dc67a7d (e8aae75634eb0b35aa811ecfad83cd123bec3c0e)
- 0024f1ce915d6e407473592476b05038f8cdb0709d2a4aa331c9f30a00d9e487 (92efb423c37759fd50aa0aea1db9e4c12f3aac98)
- 5998b27b46112b6ce6f2df515c9a1b9d212e3e75ffb7c35966680964b85a449d (a4a5d884f06a5cecee1395ff7a2d89d3283d646c)
- 61e15c2eff9b3564b8f076bcb650b9e9847d7aabc386eda7d77f10e8c49f1136 (856f0b2b9826d2b51bf171ea647d92e90fca715f)
- 3a2f406c7c2cc7de0cf4a198623824c979809a74ec4f26dad2aa47189b42215f (bb381012b149bd75e965712581ce299c0f544612)
- de3669e9b655c76dbc22feeee91d4caea1fbf028c9aacb565d435fdf2319c5d5 (8005c4cc90add8c3811683b2a55f0bb411165bc4)
- f0868b81529d08b281ef5b27f1d312282532a034d546729fd5d11129b47a1a51 (d4ba2af71396c2e88dcdcc7591e89b7b197d0d0d)
- 76d9822d3fe37ddc05aeede0658bd020de5c5917050cde9098ec4db175c12956 (1f64b191bbbca53912f938ed10b67e866c54a59d)
- .ipynb_checkpoints/config-checkpoint.json +29 -0
- .ipynb_checkpoints/generation_config-checkpoint.json +8 -0
- .ipynb_checkpoints/model.safetensors.index-checkpoint.json +730 -0
- .ipynb_checkpoints/special_tokens_map-checkpoint.json +24 -0
- .ipynb_checkpoints/tokenizer-checkpoint.json +0 -0
- .ipynb_checkpoints/tokenizer_config-checkpoint.json +40 -0
- config.json +29 -0
- generation_config.json +8 -0
- model-00001-of-00061.safetensors +3 -0
- model-00002-of-00061.safetensors +3 -0
- model-00003-of-00061.safetensors +3 -0
- model-00004-of-00061.safetensors +3 -0
- model-00005-of-00061.safetensors +3 -0
- model-00006-of-00061.safetensors +3 -0
- model-00007-of-00061.safetensors +3 -0
- model-00008-of-00061.safetensors +3 -0
- model-00009-of-00061.safetensors +3 -0
- model-00010-of-00061.safetensors +3 -0
- model-00011-of-00061.safetensors +3 -0
- model-00012-of-00061.safetensors +3 -0
- model-00013-of-00061.safetensors +3 -0
- model-00014-of-00061.safetensors +3 -0
- model-00015-of-00061.safetensors +3 -0
- model-00016-of-00061.safetensors +3 -0
- model-00017-of-00061.safetensors +3 -0
- model-00018-of-00061.safetensors +3 -0
- model-00019-of-00061.safetensors +3 -0
- model-00020-of-00061.safetensors +3 -0
- model-00021-of-00061.safetensors +3 -0
- model-00022-of-00061.safetensors +3 -0
- model-00023-of-00061.safetensors +3 -0
- model-00024-of-00061.safetensors +3 -0
- model-00025-of-00061.safetensors +3 -0
- model-00026-of-00061.safetensors +3 -0
- model-00027-of-00061.safetensors +3 -0
- model-00028-of-00061.safetensors +3 -0
- model-00029-of-00061.safetensors +3 -0
- model-00030-of-00061.safetensors +3 -0
- model-00031-of-00061.safetensors +3 -0
- model-00032-of-00061.safetensors +3 -0
- model-00033-of-00061.safetensors +3 -0
- model-00034-of-00061.safetensors +3 -0
- model-00035-of-00061.safetensors +3 -0
- model-00036-of-00061.safetensors +3 -0
- model-00037-of-00061.safetensors +3 -0
- model-00038-of-00061.safetensors +3 -0
- model-00039-of-00061.safetensors +3 -0
- model-00040-of-00061.safetensors +3 -0
- model-00041-of-00061.safetensors +3 -0
- model-00042-of-00061.safetensors +3 -0
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sequelbox/SpellBlade",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 28672,
|
14 |
+
"max_position_embeddings": 4096,
|
15 |
+
"model_type": "llama",
|
16 |
+
"num_attention_heads": 64,
|
17 |
+
"num_hidden_layers": 80,
|
18 |
+
"num_key_value_heads": 8,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pretraining_tp": 1,
|
21 |
+
"rms_norm_eps": 1e-05,
|
22 |
+
"rope_scaling": null,
|
23 |
+
"rope_theta": 10000.0,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.36.2",
|
27 |
+
"use_cache": false,
|
28 |
+
"vocab_size": 32000
|
29 |
+
}
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.36.2",
|
7 |
+
"use_cache": false
|
8 |
+
}
|
@@ -0,0 +1,730 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 275906592768
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00061-of-00061.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model-00001-of-00061.safetensors",
|
8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00061.safetensors",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00061.safetensors",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00061.safetensors",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00061.safetensors",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00061.safetensors",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00061.safetensors",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00061.safetensors",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00061.safetensors",
|
16 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00061.safetensors",
|
17 |
+
"model.layers.1.input_layernorm.weight": "model-00002-of-00061.safetensors",
|
18 |
+
"model.layers.1.mlp.down_proj.weight": "model-00002-of-00061.safetensors",
|
19 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00002-of-00061.safetensors",
|
20 |
+
"model.layers.1.mlp.up_proj.weight": "model-00002-of-00061.safetensors",
|
21 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00002-of-00061.safetensors",
|
22 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00061.safetensors",
|
23 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00002-of-00061.safetensors",
|
24 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00061.safetensors",
|
25 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00061.safetensors",
|
26 |
+
"model.layers.10.input_layernorm.weight": "model-00009-of-00061.safetensors",
|
27 |
+
"model.layers.10.mlp.down_proj.weight": "model-00009-of-00061.safetensors",
|
28 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00008-of-00061.safetensors",
|
29 |
+
"model.layers.10.mlp.up_proj.weight": "model-00009-of-00061.safetensors",
|
30 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00009-of-00061.safetensors",
|
31 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00008-of-00061.safetensors",
|
32 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00008-of-00061.safetensors",
|
33 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00008-of-00061.safetensors",
|
34 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00008-of-00061.safetensors",
|
35 |
+
"model.layers.11.input_layernorm.weight": "model-00010-of-00061.safetensors",
|
36 |
+
"model.layers.11.mlp.down_proj.weight": "model-00010-of-00061.safetensors",
|
37 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00009-of-00061.safetensors",
|
38 |
+
"model.layers.11.mlp.up_proj.weight": "model-00009-of-00061.safetensors",
|
39 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00010-of-00061.safetensors",
|
40 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00009-of-00061.safetensors",
|
41 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00009-of-00061.safetensors",
|
42 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00009-of-00061.safetensors",
|
43 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00009-of-00061.safetensors",
|
44 |
+
"model.layers.12.input_layernorm.weight": "model-00010-of-00061.safetensors",
|
45 |
+
"model.layers.12.mlp.down_proj.weight": "model-00010-of-00061.safetensors",
|
46 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00010-of-00061.safetensors",
|
47 |
+
"model.layers.12.mlp.up_proj.weight": "model-00010-of-00061.safetensors",
|
48 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00010-of-00061.safetensors",
|
49 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00010-of-00061.safetensors",
|
50 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00010-of-00061.safetensors",
|
51 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00010-of-00061.safetensors",
|
52 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00010-of-00061.safetensors",
|
53 |
+
"model.layers.13.input_layernorm.weight": "model-00011-of-00061.safetensors",
|
54 |
+
"model.layers.13.mlp.down_proj.weight": "model-00011-of-00061.safetensors",
|
55 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00011-of-00061.safetensors",
|
56 |
+
"model.layers.13.mlp.up_proj.weight": "model-00011-of-00061.safetensors",
|
57 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00011-of-00061.safetensors",
|
58 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00010-of-00061.safetensors",
|
59 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00010-of-00061.safetensors",
|
60 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00010-of-00061.safetensors",
|
61 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00010-of-00061.safetensors",
|
62 |
+
"model.layers.14.input_layernorm.weight": "model-00012-of-00061.safetensors",
|
63 |
+
"model.layers.14.mlp.down_proj.weight": "model-00012-of-00061.safetensors",
|
64 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00011-of-00061.safetensors",
|
65 |
+
"model.layers.14.mlp.up_proj.weight": "model-00012-of-00061.safetensors",
|
66 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00012-of-00061.safetensors",
|
67 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00011-of-00061.safetensors",
|
68 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00011-of-00061.safetensors",
|
69 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00011-of-00061.safetensors",
|
70 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00011-of-00061.safetensors",
|
71 |
+
"model.layers.15.input_layernorm.weight": "model-00013-of-00061.safetensors",
|
72 |
+
"model.layers.15.mlp.down_proj.weight": "model-00013-of-00061.safetensors",
|
73 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00012-of-00061.safetensors",
|
74 |
+
"model.layers.15.mlp.up_proj.weight": "model-00012-of-00061.safetensors",
|
75 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00013-of-00061.safetensors",
|
76 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00012-of-00061.safetensors",
|
77 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00012-of-00061.safetensors",
|
78 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00012-of-00061.safetensors",
|
79 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00012-of-00061.safetensors",
|
80 |
+
"model.layers.16.input_layernorm.weight": "model-00013-of-00061.safetensors",
|
81 |
+
"model.layers.16.mlp.down_proj.weight": "model-00013-of-00061.safetensors",
|
82 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00013-of-00061.safetensors",
|
83 |
+
"model.layers.16.mlp.up_proj.weight": "model-00013-of-00061.safetensors",
|
84 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00013-of-00061.safetensors",
|
85 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00013-of-00061.safetensors",
|
86 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00013-of-00061.safetensors",
|
87 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00013-of-00061.safetensors",
|
88 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00013-of-00061.safetensors",
|
89 |
+
"model.layers.17.input_layernorm.weight": "model-00014-of-00061.safetensors",
|
90 |
+
"model.layers.17.mlp.down_proj.weight": "model-00014-of-00061.safetensors",
|
91 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00014-of-00061.safetensors",
|
92 |
+
"model.layers.17.mlp.up_proj.weight": "model-00014-of-00061.safetensors",
|
93 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00014-of-00061.safetensors",
|
94 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00013-of-00061.safetensors",
|
95 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00013-of-00061.safetensors",
|
96 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00013-of-00061.safetensors",
|
97 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00013-of-00061.safetensors",
|
98 |
+
"model.layers.18.input_layernorm.weight": "model-00015-of-00061.safetensors",
|
99 |
+
"model.layers.18.mlp.down_proj.weight": "model-00015-of-00061.safetensors",
|
100 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00014-of-00061.safetensors",
|
101 |
+
"model.layers.18.mlp.up_proj.weight": "model-00015-of-00061.safetensors",
|
102 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00015-of-00061.safetensors",
|
103 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00014-of-00061.safetensors",
|
104 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00014-of-00061.safetensors",
|
105 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00014-of-00061.safetensors",
|
106 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00014-of-00061.safetensors",
|
107 |
+
"model.layers.19.input_layernorm.weight": "model-00016-of-00061.safetensors",
|
108 |
+
"model.layers.19.mlp.down_proj.weight": "model-00016-of-00061.safetensors",
|
109 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00015-of-00061.safetensors",
|
110 |
+
"model.layers.19.mlp.up_proj.weight": "model-00015-of-00061.safetensors",
|
111 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00016-of-00061.safetensors",
|
112 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00015-of-00061.safetensors",
|
113 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00015-of-00061.safetensors",
|
114 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00015-of-00061.safetensors",
|
115 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00015-of-00061.safetensors",
|
116 |
+
"model.layers.2.input_layernorm.weight": "model-00003-of-00061.safetensors",
|
117 |
+
"model.layers.2.mlp.down_proj.weight": "model-00003-of-00061.safetensors",
|
118 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00002-of-00061.safetensors",
|
119 |
+
"model.layers.2.mlp.up_proj.weight": "model-00003-of-00061.safetensors",
|
120 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00003-of-00061.safetensors",
|
121 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00002-of-00061.safetensors",
|
122 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00061.safetensors",
|
123 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00002-of-00061.safetensors",
|
124 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00061.safetensors",
|
125 |
+
"model.layers.20.input_layernorm.weight": "model-00016-of-00061.safetensors",
|
126 |
+
"model.layers.20.mlp.down_proj.weight": "model-00016-of-00061.safetensors",
|
127 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00016-of-00061.safetensors",
|
128 |
+
"model.layers.20.mlp.up_proj.weight": "model-00016-of-00061.safetensors",
|
129 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00016-of-00061.safetensors",
|
130 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00016-of-00061.safetensors",
|
131 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00016-of-00061.safetensors",
|
132 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00016-of-00061.safetensors",
|
133 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00016-of-00061.safetensors",
|
134 |
+
"model.layers.21.input_layernorm.weight": "model-00017-of-00061.safetensors",
|
135 |
+
"model.layers.21.mlp.down_proj.weight": "model-00017-of-00061.safetensors",
|
136 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00017-of-00061.safetensors",
|
137 |
+
"model.layers.21.mlp.up_proj.weight": "model-00017-of-00061.safetensors",
|
138 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00017-of-00061.safetensors",
|
139 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00016-of-00061.safetensors",
|
140 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00016-of-00061.safetensors",
|
141 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00016-of-00061.safetensors",
|
142 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00016-of-00061.safetensors",
|
143 |
+
"model.layers.22.input_layernorm.weight": "model-00018-of-00061.safetensors",
|
144 |
+
"model.layers.22.mlp.down_proj.weight": "model-00018-of-00061.safetensors",
|
145 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00017-of-00061.safetensors",
|
146 |
+
"model.layers.22.mlp.up_proj.weight": "model-00018-of-00061.safetensors",
|
147 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00018-of-00061.safetensors",
|
148 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00017-of-00061.safetensors",
|
149 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00017-of-00061.safetensors",
|
150 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00017-of-00061.safetensors",
|
151 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00017-of-00061.safetensors",
|
152 |
+
"model.layers.23.input_layernorm.weight": "model-00019-of-00061.safetensors",
|
153 |
+
"model.layers.23.mlp.down_proj.weight": "model-00019-of-00061.safetensors",
|
154 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00018-of-00061.safetensors",
|
155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00018-of-00061.safetensors",
|
156 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00019-of-00061.safetensors",
|
157 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00018-of-00061.safetensors",
|
158 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00018-of-00061.safetensors",
|
159 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00018-of-00061.safetensors",
|
160 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00018-of-00061.safetensors",
|
161 |
+
"model.layers.24.input_layernorm.weight": "model-00019-of-00061.safetensors",
|
162 |
+
"model.layers.24.mlp.down_proj.weight": "model-00019-of-00061.safetensors",
|
163 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00019-of-00061.safetensors",
|
164 |
+
"model.layers.24.mlp.up_proj.weight": "model-00019-of-00061.safetensors",
|
165 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00019-of-00061.safetensors",
|
166 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00019-of-00061.safetensors",
|
167 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00019-of-00061.safetensors",
|
168 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00019-of-00061.safetensors",
|
169 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00019-of-00061.safetensors",
|
170 |
+
"model.layers.25.input_layernorm.weight": "model-00020-of-00061.safetensors",
|
171 |
+
"model.layers.25.mlp.down_proj.weight": "model-00020-of-00061.safetensors",
|
172 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00020-of-00061.safetensors",
|
173 |
+
"model.layers.25.mlp.up_proj.weight": "model-00020-of-00061.safetensors",
|
174 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00020-of-00061.safetensors",
|
175 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00019-of-00061.safetensors",
|
176 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00019-of-00061.safetensors",
|
177 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00019-of-00061.safetensors",
|
178 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00019-of-00061.safetensors",
|
179 |
+
"model.layers.26.input_layernorm.weight": "model-00021-of-00061.safetensors",
|
180 |
+
"model.layers.26.mlp.down_proj.weight": "model-00021-of-00061.safetensors",
|
181 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00020-of-00061.safetensors",
|
182 |
+
"model.layers.26.mlp.up_proj.weight": "model-00021-of-00061.safetensors",
|
183 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00021-of-00061.safetensors",
|
184 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00020-of-00061.safetensors",
|
185 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00020-of-00061.safetensors",
|
186 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00020-of-00061.safetensors",
|
187 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00020-of-00061.safetensors",
|
188 |
+
"model.layers.27.input_layernorm.weight": "model-00022-of-00061.safetensors",
|
189 |
+
"model.layers.27.mlp.down_proj.weight": "model-00022-of-00061.safetensors",
|
190 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00021-of-00061.safetensors",
|
191 |
+
"model.layers.27.mlp.up_proj.weight": "model-00021-of-00061.safetensors",
|
192 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00022-of-00061.safetensors",
|
193 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00021-of-00061.safetensors",
|
194 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00021-of-00061.safetensors",
|
195 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00021-of-00061.safetensors",
|
196 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00021-of-00061.safetensors",
|
197 |
+
"model.layers.28.input_layernorm.weight": "model-00022-of-00061.safetensors",
|
198 |
+
"model.layers.28.mlp.down_proj.weight": "model-00022-of-00061.safetensors",
|
199 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00022-of-00061.safetensors",
|
200 |
+
"model.layers.28.mlp.up_proj.weight": "model-00022-of-00061.safetensors",
|
201 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00022-of-00061.safetensors",
|
202 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00022-of-00061.safetensors",
|
203 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00022-of-00061.safetensors",
|
204 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00022-of-00061.safetensors",
|
205 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00022-of-00061.safetensors",
|
206 |
+
"model.layers.29.input_layernorm.weight": "model-00023-of-00061.safetensors",
|
207 |
+
"model.layers.29.mlp.down_proj.weight": "model-00023-of-00061.safetensors",
|
208 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00023-of-00061.safetensors",
|
209 |
+
"model.layers.29.mlp.up_proj.weight": "model-00023-of-00061.safetensors",
|
210 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00023-of-00061.safetensors",
|
211 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00022-of-00061.safetensors",
|
212 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00022-of-00061.safetensors",
|
213 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00022-of-00061.safetensors",
|
214 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00022-of-00061.safetensors",
|
215 |
+
"model.layers.3.input_layernorm.weight": "model-00004-of-00061.safetensors",
|
216 |
+
"model.layers.3.mlp.down_proj.weight": "model-00004-of-00061.safetensors",
|
217 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00003-of-00061.safetensors",
|
218 |
+
"model.layers.3.mlp.up_proj.weight": "model-00003-of-00061.safetensors",
|
219 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00004-of-00061.safetensors",
|
220 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00003-of-00061.safetensors",
|
221 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00003-of-00061.safetensors",
|
222 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00003-of-00061.safetensors",
|
223 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00003-of-00061.safetensors",
|
224 |
+
"model.layers.30.input_layernorm.weight": "model-00024-of-00061.safetensors",
|
225 |
+
"model.layers.30.mlp.down_proj.weight": "model-00024-of-00061.safetensors",
|
226 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00023-of-00061.safetensors",
|
227 |
+
"model.layers.30.mlp.up_proj.weight": "model-00024-of-00061.safetensors",
|
228 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00024-of-00061.safetensors",
|
229 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00023-of-00061.safetensors",
|
230 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00023-of-00061.safetensors",
|
231 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00023-of-00061.safetensors",
|
232 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00023-of-00061.safetensors",
|
233 |
+
"model.layers.31.input_layernorm.weight": "model-00025-of-00061.safetensors",
|
234 |
+
"model.layers.31.mlp.down_proj.weight": "model-00025-of-00061.safetensors",
|
235 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00024-of-00061.safetensors",
|
236 |
+
"model.layers.31.mlp.up_proj.weight": "model-00024-of-00061.safetensors",
|
237 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00025-of-00061.safetensors",
|
238 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00024-of-00061.safetensors",
|
239 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00024-of-00061.safetensors",
|
240 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00024-of-00061.safetensors",
|
241 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00024-of-00061.safetensors",
|
242 |
+
"model.layers.32.input_layernorm.weight": "model-00025-of-00061.safetensors",
|
243 |
+
"model.layers.32.mlp.down_proj.weight": "model-00025-of-00061.safetensors",
|
244 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00025-of-00061.safetensors",
|
245 |
+
"model.layers.32.mlp.up_proj.weight": "model-00025-of-00061.safetensors",
|
246 |
+
"model.layers.32.post_attention_layernorm.weight": "model-00025-of-00061.safetensors",
|
247 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00025-of-00061.safetensors",
|
248 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00025-of-00061.safetensors",
|
249 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00025-of-00061.safetensors",
|
250 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00025-of-00061.safetensors",
|
251 |
+
"model.layers.33.input_layernorm.weight": "model-00026-of-00061.safetensors",
|
252 |
+
"model.layers.33.mlp.down_proj.weight": "model-00026-of-00061.safetensors",
|
253 |
+
"model.layers.33.mlp.gate_proj.weight": "model-00026-of-00061.safetensors",
|
254 |
+
"model.layers.33.mlp.up_proj.weight": "model-00026-of-00061.safetensors",
|
255 |
+
"model.layers.33.post_attention_layernorm.weight": "model-00026-of-00061.safetensors",
|
256 |
+
"model.layers.33.self_attn.k_proj.weight": "model-00025-of-00061.safetensors",
|
257 |
+
"model.layers.33.self_attn.o_proj.weight": "model-00025-of-00061.safetensors",
|
258 |
+
"model.layers.33.self_attn.q_proj.weight": "model-00025-of-00061.safetensors",
|
259 |
+
"model.layers.33.self_attn.v_proj.weight": "model-00025-of-00061.safetensors",
|
260 |
+
"model.layers.34.input_layernorm.weight": "model-00027-of-00061.safetensors",
|
261 |
+
"model.layers.34.mlp.down_proj.weight": "model-00027-of-00061.safetensors",
|
262 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00026-of-00061.safetensors",
|
263 |
+
"model.layers.34.mlp.up_proj.weight": "model-00027-of-00061.safetensors",
|
264 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00027-of-00061.safetensors",
|
265 |
+
"model.layers.34.self_attn.k_proj.weight": "model-00026-of-00061.safetensors",
|
266 |
+
"model.layers.34.self_attn.o_proj.weight": "model-00026-of-00061.safetensors",
|
267 |
+
"model.layers.34.self_attn.q_proj.weight": "model-00026-of-00061.safetensors",
|
268 |
+
"model.layers.34.self_attn.v_proj.weight": "model-00026-of-00061.safetensors",
|
269 |
+
"model.layers.35.input_layernorm.weight": "model-00028-of-00061.safetensors",
|
270 |
+
"model.layers.35.mlp.down_proj.weight": "model-00028-of-00061.safetensors",
|
271 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00027-of-00061.safetensors",
|
272 |
+
"model.layers.35.mlp.up_proj.weight": "model-00027-of-00061.safetensors",
|
273 |
+
"model.layers.35.post_attention_layernorm.weight": "model-00028-of-00061.safetensors",
|
274 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00027-of-00061.safetensors",
|
275 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00027-of-00061.safetensors",
|
276 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00027-of-00061.safetensors",
|
277 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00027-of-00061.safetensors",
|
278 |
+
"model.layers.36.input_layernorm.weight": "model-00028-of-00061.safetensors",
|
279 |
+
"model.layers.36.mlp.down_proj.weight": "model-00028-of-00061.safetensors",
|
280 |
+
"model.layers.36.mlp.gate_proj.weight": "model-00028-of-00061.safetensors",
|
281 |
+
"model.layers.36.mlp.up_proj.weight": "model-00028-of-00061.safetensors",
|
282 |
+
"model.layers.36.post_attention_layernorm.weight": "model-00028-of-00061.safetensors",
|
283 |
+
"model.layers.36.self_attn.k_proj.weight": "model-00028-of-00061.safetensors",
|
284 |
+
"model.layers.36.self_attn.o_proj.weight": "model-00028-of-00061.safetensors",
|
285 |
+
"model.layers.36.self_attn.q_proj.weight": "model-00028-of-00061.safetensors",
|
286 |
+
"model.layers.36.self_attn.v_proj.weight": "model-00028-of-00061.safetensors",
|
287 |
+
"model.layers.37.input_layernorm.weight": "model-00029-of-00061.safetensors",
|
288 |
+
"model.layers.37.mlp.down_proj.weight": "model-00029-of-00061.safetensors",
|
289 |
+
"model.layers.37.mlp.gate_proj.weight": "model-00029-of-00061.safetensors",
|
290 |
+
"model.layers.37.mlp.up_proj.weight": "model-00029-of-00061.safetensors",
|
291 |
+
"model.layers.37.post_attention_layernorm.weight": "model-00029-of-00061.safetensors",
|
292 |
+
"model.layers.37.self_attn.k_proj.weight": "model-00028-of-00061.safetensors",
|
293 |
+
"model.layers.37.self_attn.o_proj.weight": "model-00028-of-00061.safetensors",
|
294 |
+
"model.layers.37.self_attn.q_proj.weight": "model-00028-of-00061.safetensors",
|
295 |
+
"model.layers.37.self_attn.v_proj.weight": "model-00028-of-00061.safetensors",
|
296 |
+
"model.layers.38.input_layernorm.weight": "model-00030-of-00061.safetensors",
|
297 |
+
"model.layers.38.mlp.down_proj.weight": "model-00030-of-00061.safetensors",
|
298 |
+
"model.layers.38.mlp.gate_proj.weight": "model-00029-of-00061.safetensors",
|
299 |
+
"model.layers.38.mlp.up_proj.weight": "model-00030-of-00061.safetensors",
|
300 |
+
"model.layers.38.post_attention_layernorm.weight": "model-00030-of-00061.safetensors",
|
301 |
+
"model.layers.38.self_attn.k_proj.weight": "model-00029-of-00061.safetensors",
|
302 |
+
"model.layers.38.self_attn.o_proj.weight": "model-00029-of-00061.safetensors",
|
303 |
+
"model.layers.38.self_attn.q_proj.weight": "model-00029-of-00061.safetensors",
|
304 |
+
"model.layers.38.self_attn.v_proj.weight": "model-00029-of-00061.safetensors",
|
305 |
+
"model.layers.39.input_layernorm.weight": "model-00031-of-00061.safetensors",
|
306 |
+
"model.layers.39.mlp.down_proj.weight": "model-00031-of-00061.safetensors",
|
307 |
+
"model.layers.39.mlp.gate_proj.weight": "model-00030-of-00061.safetensors",
|
308 |
+
"model.layers.39.mlp.up_proj.weight": "model-00030-of-00061.safetensors",
|
309 |
+
"model.layers.39.post_attention_layernorm.weight": "model-00031-of-00061.safetensors",
|
310 |
+
"model.layers.39.self_attn.k_proj.weight": "model-00030-of-00061.safetensors",
|
311 |
+
"model.layers.39.self_attn.o_proj.weight": "model-00030-of-00061.safetensors",
|
312 |
+
"model.layers.39.self_attn.q_proj.weight": "model-00030-of-00061.safetensors",
|
313 |
+
"model.layers.39.self_attn.v_proj.weight": "model-00030-of-00061.safetensors",
|
314 |
+
"model.layers.4.input_layernorm.weight": "model-00004-of-00061.safetensors",
|
315 |
+
"model.layers.4.mlp.down_proj.weight": "model-00004-of-00061.safetensors",
|
316 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00004-of-00061.safetensors",
|
317 |
+
"model.layers.4.mlp.up_proj.weight": "model-00004-of-00061.safetensors",
|
318 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00004-of-00061.safetensors",
|
319 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00004-of-00061.safetensors",
|
320 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00004-of-00061.safetensors",
|
321 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00004-of-00061.safetensors",
|
322 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00004-of-00061.safetensors",
|
323 |
+
"model.layers.40.input_layernorm.weight": "model-00031-of-00061.safetensors",
|
324 |
+
"model.layers.40.mlp.down_proj.weight": "model-00031-of-00061.safetensors",
|
325 |
+
"model.layers.40.mlp.gate_proj.weight": "model-00031-of-00061.safetensors",
|
326 |
+
"model.layers.40.mlp.up_proj.weight": "model-00031-of-00061.safetensors",
|
327 |
+
"model.layers.40.post_attention_layernorm.weight": "model-00031-of-00061.safetensors",
|
328 |
+
"model.layers.40.self_attn.k_proj.weight": "model-00031-of-00061.safetensors",
|
329 |
+
"model.layers.40.self_attn.o_proj.weight": "model-00031-of-00061.safetensors",
|
330 |
+
"model.layers.40.self_attn.q_proj.weight": "model-00031-of-00061.safetensors",
|
331 |
+
"model.layers.40.self_attn.v_proj.weight": "model-00031-of-00061.safetensors",
|
332 |
+
"model.layers.41.input_layernorm.weight": "model-00032-of-00061.safetensors",
|
333 |
+
"model.layers.41.mlp.down_proj.weight": "model-00032-of-00061.safetensors",
|
334 |
+
"model.layers.41.mlp.gate_proj.weight": "model-00032-of-00061.safetensors",
|
335 |
+
"model.layers.41.mlp.up_proj.weight": "model-00032-of-00061.safetensors",
|
336 |
+
"model.layers.41.post_attention_layernorm.weight": "model-00032-of-00061.safetensors",
|
337 |
+
"model.layers.41.self_attn.k_proj.weight": "model-00031-of-00061.safetensors",
|
338 |
+
"model.layers.41.self_attn.o_proj.weight": "model-00031-of-00061.safetensors",
|
339 |
+
"model.layers.41.self_attn.q_proj.weight": "model-00031-of-00061.safetensors",
|
340 |
+
"model.layers.41.self_attn.v_proj.weight": "model-00031-of-00061.safetensors",
|
341 |
+
"model.layers.42.input_layernorm.weight": "model-00033-of-00061.safetensors",
|
342 |
+
"model.layers.42.mlp.down_proj.weight": "model-00033-of-00061.safetensors",
|
343 |
+
"model.layers.42.mlp.gate_proj.weight": "model-00032-of-00061.safetensors",
|
344 |
+
"model.layers.42.mlp.up_proj.weight": "model-00033-of-00061.safetensors",
|
345 |
+
"model.layers.42.post_attention_layernorm.weight": "model-00033-of-00061.safetensors",
|
346 |
+
"model.layers.42.self_attn.k_proj.weight": "model-00032-of-00061.safetensors",
|
347 |
+
"model.layers.42.self_attn.o_proj.weight": "model-00032-of-00061.safetensors",
|
348 |
+
"model.layers.42.self_attn.q_proj.weight": "model-00032-of-00061.safetensors",
|
349 |
+
"model.layers.42.self_attn.v_proj.weight": "model-00032-of-00061.safetensors",
|
350 |
+
"model.layers.43.input_layernorm.weight": "model-00034-of-00061.safetensors",
|
351 |
+
"model.layers.43.mlp.down_proj.weight": "model-00034-of-00061.safetensors",
|
352 |
+
"model.layers.43.mlp.gate_proj.weight": "model-00033-of-00061.safetensors",
|
353 |
+
"model.layers.43.mlp.up_proj.weight": "model-00033-of-00061.safetensors",
|
354 |
+
"model.layers.43.post_attention_layernorm.weight": "model-00034-of-00061.safetensors",
|
355 |
+
"model.layers.43.self_attn.k_proj.weight": "model-00033-of-00061.safetensors",
|
356 |
+
"model.layers.43.self_attn.o_proj.weight": "model-00033-of-00061.safetensors",
|
357 |
+
"model.layers.43.self_attn.q_proj.weight": "model-00033-of-00061.safetensors",
|
358 |
+
"model.layers.43.self_attn.v_proj.weight": "model-00033-of-00061.safetensors",
|
359 |
+
"model.layers.44.input_layernorm.weight": "model-00034-of-00061.safetensors",
|
360 |
+
"model.layers.44.mlp.down_proj.weight": "model-00034-of-00061.safetensors",
|
361 |
+
"model.layers.44.mlp.gate_proj.weight": "model-00034-of-00061.safetensors",
|
362 |
+
"model.layers.44.mlp.up_proj.weight": "model-00034-of-00061.safetensors",
|
363 |
+
"model.layers.44.post_attention_layernorm.weight": "model-00034-of-00061.safetensors",
|
364 |
+
"model.layers.44.self_attn.k_proj.weight": "model-00034-of-00061.safetensors",
|
365 |
+
"model.layers.44.self_attn.o_proj.weight": "model-00034-of-00061.safetensors",
|
366 |
+
"model.layers.44.self_attn.q_proj.weight": "model-00034-of-00061.safetensors",
|
367 |
+
"model.layers.44.self_attn.v_proj.weight": "model-00034-of-00061.safetensors",
|
368 |
+
"model.layers.45.input_layernorm.weight": "model-00035-of-00061.safetensors",
|
369 |
+
"model.layers.45.mlp.down_proj.weight": "model-00035-of-00061.safetensors",
|
370 |
+
"model.layers.45.mlp.gate_proj.weight": "model-00035-of-00061.safetensors",
|
371 |
+
"model.layers.45.mlp.up_proj.weight": "model-00035-of-00061.safetensors",
|
372 |
+
"model.layers.45.post_attention_layernorm.weight": "model-00035-of-00061.safetensors",
|
373 |
+
"model.layers.45.self_attn.k_proj.weight": "model-00034-of-00061.safetensors",
|
374 |
+
"model.layers.45.self_attn.o_proj.weight": "model-00034-of-00061.safetensors",
|
375 |
+
"model.layers.45.self_attn.q_proj.weight": "model-00034-of-00061.safetensors",
|
376 |
+
"model.layers.45.self_attn.v_proj.weight": "model-00034-of-00061.safetensors",
|
377 |
+
"model.layers.46.input_layernorm.weight": "model-00036-of-00061.safetensors",
|
378 |
+
"model.layers.46.mlp.down_proj.weight": "model-00036-of-00061.safetensors",
|
379 |
+
"model.layers.46.mlp.gate_proj.weight": "model-00035-of-00061.safetensors",
|
380 |
+
"model.layers.46.mlp.up_proj.weight": "model-00036-of-00061.safetensors",
|
381 |
+
"model.layers.46.post_attention_layernorm.weight": "model-00036-of-00061.safetensors",
|
382 |
+
"model.layers.46.self_attn.k_proj.weight": "model-00035-of-00061.safetensors",
|
383 |
+
"model.layers.46.self_attn.o_proj.weight": "model-00035-of-00061.safetensors",
|
384 |
+
"model.layers.46.self_attn.q_proj.weight": "model-00035-of-00061.safetensors",
|
385 |
+
"model.layers.46.self_attn.v_proj.weight": "model-00035-of-00061.safetensors",
|
386 |
+
"model.layers.47.input_layernorm.weight": "model-00037-of-00061.safetensors",
|
387 |
+
"model.layers.47.mlp.down_proj.weight": "model-00037-of-00061.safetensors",
|
388 |
+
"model.layers.47.mlp.gate_proj.weight": "model-00036-of-00061.safetensors",
|
389 |
+
"model.layers.47.mlp.up_proj.weight": "model-00036-of-00061.safetensors",
|
390 |
+
"model.layers.47.post_attention_layernorm.weight": "model-00037-of-00061.safetensors",
|
391 |
+
"model.layers.47.self_attn.k_proj.weight": "model-00036-of-00061.safetensors",
|
392 |
+
"model.layers.47.self_attn.o_proj.weight": "model-00036-of-00061.safetensors",
|
393 |
+
"model.layers.47.self_attn.q_proj.weight": "model-00036-of-00061.safetensors",
|
394 |
+
"model.layers.47.self_attn.v_proj.weight": "model-00036-of-00061.safetensors",
|
395 |
+
"model.layers.48.input_layernorm.weight": "model-00037-of-00061.safetensors",
|
396 |
+
"model.layers.48.mlp.down_proj.weight": "model-00037-of-00061.safetensors",
|
397 |
+
"model.layers.48.mlp.gate_proj.weight": "model-00037-of-00061.safetensors",
|
398 |
+
"model.layers.48.mlp.up_proj.weight": "model-00037-of-00061.safetensors",
|
399 |
+
"model.layers.48.post_attention_layernorm.weight": "model-00037-of-00061.safetensors",
|
400 |
+
"model.layers.48.self_attn.k_proj.weight": "model-00037-of-00061.safetensors",
|
401 |
+
"model.layers.48.self_attn.o_proj.weight": "model-00037-of-00061.safetensors",
|
402 |
+
"model.layers.48.self_attn.q_proj.weight": "model-00037-of-00061.safetensors",
|
403 |
+
"model.layers.48.self_attn.v_proj.weight": "model-00037-of-00061.safetensors",
|
404 |
+
"model.layers.49.input_layernorm.weight": "model-00038-of-00061.safetensors",
|
405 |
+
"model.layers.49.mlp.down_proj.weight": "model-00038-of-00061.safetensors",
|
406 |
+
"model.layers.49.mlp.gate_proj.weight": "model-00038-of-00061.safetensors",
|
407 |
+
"model.layers.49.mlp.up_proj.weight": "model-00038-of-00061.safetensors",
|
408 |
+
"model.layers.49.post_attention_layernorm.weight": "model-00038-of-00061.safetensors",
|
409 |
+
"model.layers.49.self_attn.k_proj.weight": "model-00037-of-00061.safetensors",
|
410 |
+
"model.layers.49.self_attn.o_proj.weight": "model-00037-of-00061.safetensors",
|
411 |
+
"model.layers.49.self_attn.q_proj.weight": "model-00037-of-00061.safetensors",
|
412 |
+
"model.layers.49.self_attn.v_proj.weight": "model-00037-of-00061.safetensors",
|
413 |
+
"model.layers.5.input_layernorm.weight": "model-00005-of-00061.safetensors",
|
414 |
+
"model.layers.5.mlp.down_proj.weight": "model-00005-of-00061.safetensors",
|
415 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00005-of-00061.safetensors",
|
416 |
+
"model.layers.5.mlp.up_proj.weight": "model-00005-of-00061.safetensors",
|
417 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00005-of-00061.safetensors",
|
418 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00004-of-00061.safetensors",
|
419 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00004-of-00061.safetensors",
|
420 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00004-of-00061.safetensors",
|
421 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00004-of-00061.safetensors",
|
422 |
+
"model.layers.50.input_layernorm.weight": "model-00039-of-00061.safetensors",
|
423 |
+
"model.layers.50.mlp.down_proj.weight": "model-00039-of-00061.safetensors",
|
424 |
+
"model.layers.50.mlp.gate_proj.weight": "model-00038-of-00061.safetensors",
|
425 |
+
"model.layers.50.mlp.up_proj.weight": "model-00039-of-00061.safetensors",
|
426 |
+
"model.layers.50.post_attention_layernorm.weight": "model-00039-of-00061.safetensors",
|
427 |
+
"model.layers.50.self_attn.k_proj.weight": "model-00038-of-00061.safetensors",
|
428 |
+
"model.layers.50.self_attn.o_proj.weight": "model-00038-of-00061.safetensors",
|
429 |
+
"model.layers.50.self_attn.q_proj.weight": "model-00038-of-00061.safetensors",
|
430 |
+
"model.layers.50.self_attn.v_proj.weight": "model-00038-of-00061.safetensors",
|
431 |
+
"model.layers.51.input_layernorm.weight": "model-00040-of-00061.safetensors",
|
432 |
+
"model.layers.51.mlp.down_proj.weight": "model-00040-of-00061.safetensors",
|
433 |
+
"model.layers.51.mlp.gate_proj.weight": "model-00039-of-00061.safetensors",
|
434 |
+
"model.layers.51.mlp.up_proj.weight": "model-00039-of-00061.safetensors",
|
435 |
+
"model.layers.51.post_attention_layernorm.weight": "model-00040-of-00061.safetensors",
|
436 |
+
"model.layers.51.self_attn.k_proj.weight": "model-00039-of-00061.safetensors",
|
437 |
+
"model.layers.51.self_attn.o_proj.weight": "model-00039-of-00061.safetensors",
|
438 |
+
"model.layers.51.self_attn.q_proj.weight": "model-00039-of-00061.safetensors",
|
439 |
+
"model.layers.51.self_attn.v_proj.weight": "model-00039-of-00061.safetensors",
|
440 |
+
"model.layers.52.input_layernorm.weight": "model-00040-of-00061.safetensors",
|
441 |
+
"model.layers.52.mlp.down_proj.weight": "model-00040-of-00061.safetensors",
|
442 |
+
"model.layers.52.mlp.gate_proj.weight": "model-00040-of-00061.safetensors",
|
443 |
+
"model.layers.52.mlp.up_proj.weight": "model-00040-of-00061.safetensors",
|
444 |
+
"model.layers.52.post_attention_layernorm.weight": "model-00040-of-00061.safetensors",
|
445 |
+
"model.layers.52.self_attn.k_proj.weight": "model-00040-of-00061.safetensors",
|
446 |
+
"model.layers.52.self_attn.o_proj.weight": "model-00040-of-00061.safetensors",
|
447 |
+
"model.layers.52.self_attn.q_proj.weight": "model-00040-of-00061.safetensors",
|
448 |
+
"model.layers.52.self_attn.v_proj.weight": "model-00040-of-00061.safetensors",
|
449 |
+
"model.layers.53.input_layernorm.weight": "model-00041-of-00061.safetensors",
|
450 |
+
"model.layers.53.mlp.down_proj.weight": "model-00041-of-00061.safetensors",
|
451 |
+
"model.layers.53.mlp.gate_proj.weight": "model-00041-of-00061.safetensors",
|
452 |
+
"model.layers.53.mlp.up_proj.weight": "model-00041-of-00061.safetensors",
|
453 |
+
"model.layers.53.post_attention_layernorm.weight": "model-00041-of-00061.safetensors",
|
454 |
+
"model.layers.53.self_attn.k_proj.weight": "model-00040-of-00061.safetensors",
|
455 |
+
"model.layers.53.self_attn.o_proj.weight": "model-00040-of-00061.safetensors",
|
456 |
+
"model.layers.53.self_attn.q_proj.weight": "model-00040-of-00061.safetensors",
|
457 |
+
"model.layers.53.self_attn.v_proj.weight": "model-00040-of-00061.safetensors",
|
458 |
+
"model.layers.54.input_layernorm.weight": "model-00042-of-00061.safetensors",
|
459 |
+
"model.layers.54.mlp.down_proj.weight": "model-00042-of-00061.safetensors",
|
460 |
+
"model.layers.54.mlp.gate_proj.weight": "model-00041-of-00061.safetensors",
|
461 |
+
"model.layers.54.mlp.up_proj.weight": "model-00042-of-00061.safetensors",
|
462 |
+
"model.layers.54.post_attention_layernorm.weight": "model-00042-of-00061.safetensors",
|
463 |
+
"model.layers.54.self_attn.k_proj.weight": "model-00041-of-00061.safetensors",
|
464 |
+
"model.layers.54.self_attn.o_proj.weight": "model-00041-of-00061.safetensors",
|
465 |
+
"model.layers.54.self_attn.q_proj.weight": "model-00041-of-00061.safetensors",
|
466 |
+
"model.layers.54.self_attn.v_proj.weight": "model-00041-of-00061.safetensors",
|
467 |
+
"model.layers.55.input_layernorm.weight": "model-00043-of-00061.safetensors",
|
468 |
+
"model.layers.55.mlp.down_proj.weight": "model-00043-of-00061.safetensors",
|
469 |
+
"model.layers.55.mlp.gate_proj.weight": "model-00042-of-00061.safetensors",
|
470 |
+
"model.layers.55.mlp.up_proj.weight": "model-00042-of-00061.safetensors",
|
471 |
+
"model.layers.55.post_attention_layernorm.weight": "model-00043-of-00061.safetensors",
|
472 |
+
"model.layers.55.self_attn.k_proj.weight": "model-00042-of-00061.safetensors",
|
473 |
+
"model.layers.55.self_attn.o_proj.weight": "model-00042-of-00061.safetensors",
|
474 |
+
"model.layers.55.self_attn.q_proj.weight": "model-00042-of-00061.safetensors",
|
475 |
+
"model.layers.55.self_attn.v_proj.weight": "model-00042-of-00061.safetensors",
|
476 |
+
"model.layers.56.input_layernorm.weight": "model-00043-of-00061.safetensors",
|
477 |
+
"model.layers.56.mlp.down_proj.weight": "model-00043-of-00061.safetensors",
|
478 |
+
"model.layers.56.mlp.gate_proj.weight": "model-00043-of-00061.safetensors",
|
479 |
+
"model.layers.56.mlp.up_proj.weight": "model-00043-of-00061.safetensors",
|
480 |
+
"model.layers.56.post_attention_layernorm.weight": "model-00043-of-00061.safetensors",
|
481 |
+
"model.layers.56.self_attn.k_proj.weight": "model-00043-of-00061.safetensors",
|
482 |
+
"model.layers.56.self_attn.o_proj.weight": "model-00043-of-00061.safetensors",
|
483 |
+
"model.layers.56.self_attn.q_proj.weight": "model-00043-of-00061.safetensors",
|
484 |
+
"model.layers.56.self_attn.v_proj.weight": "model-00043-of-00061.safetensors",
|
485 |
+
"model.layers.57.input_layernorm.weight": "model-00044-of-00061.safetensors",
|
486 |
+
"model.layers.57.mlp.down_proj.weight": "model-00044-of-00061.safetensors",
|
487 |
+
"model.layers.57.mlp.gate_proj.weight": "model-00044-of-00061.safetensors",
|
488 |
+
"model.layers.57.mlp.up_proj.weight": "model-00044-of-00061.safetensors",
|
489 |
+
"model.layers.57.post_attention_layernorm.weight": "model-00044-of-00061.safetensors",
|
490 |
+
"model.layers.57.self_attn.k_proj.weight": "model-00043-of-00061.safetensors",
|
491 |
+
"model.layers.57.self_attn.o_proj.weight": "model-00043-of-00061.safetensors",
|
492 |
+
"model.layers.57.self_attn.q_proj.weight": "model-00043-of-00061.safetensors",
|
493 |
+
"model.layers.57.self_attn.v_proj.weight": "model-00043-of-00061.safetensors",
|
494 |
+
"model.layers.58.input_layernorm.weight": "model-00045-of-00061.safetensors",
|
495 |
+
"model.layers.58.mlp.down_proj.weight": "model-00045-of-00061.safetensors",
|
496 |
+
"model.layers.58.mlp.gate_proj.weight": "model-00044-of-00061.safetensors",
|
497 |
+
"model.layers.58.mlp.up_proj.weight": "model-00045-of-00061.safetensors",
|
498 |
+
"model.layers.58.post_attention_layernorm.weight": "model-00045-of-00061.safetensors",
|
499 |
+
"model.layers.58.self_attn.k_proj.weight": "model-00044-of-00061.safetensors",
|
500 |
+
"model.layers.58.self_attn.o_proj.weight": "model-00044-of-00061.safetensors",
|
501 |
+
"model.layers.58.self_attn.q_proj.weight": "model-00044-of-00061.safetensors",
|
502 |
+
"model.layers.58.self_attn.v_proj.weight": "model-00044-of-00061.safetensors",
|
503 |
+
"model.layers.59.input_layernorm.weight": "model-00046-of-00061.safetensors",
|
504 |
+
"model.layers.59.mlp.down_proj.weight": "model-00046-of-00061.safetensors",
|
505 |
+
"model.layers.59.mlp.gate_proj.weight": "model-00045-of-00061.safetensors",
|
506 |
+
"model.layers.59.mlp.up_proj.weight": "model-00045-of-00061.safetensors",
|
507 |
+
"model.layers.59.post_attention_layernorm.weight": "model-00046-of-00061.safetensors",
|
508 |
+
"model.layers.59.self_attn.k_proj.weight": "model-00045-of-00061.safetensors",
|
509 |
+
"model.layers.59.self_attn.o_proj.weight": "model-00045-of-00061.safetensors",
|
510 |
+
"model.layers.59.self_attn.q_proj.weight": "model-00045-of-00061.safetensors",
|
511 |
+
"model.layers.59.self_attn.v_proj.weight": "model-00045-of-00061.safetensors",
|
512 |
+
"model.layers.6.input_layernorm.weight": "model-00006-of-00061.safetensors",
|
513 |
+
"model.layers.6.mlp.down_proj.weight": "model-00006-of-00061.safetensors",
|
514 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00005-of-00061.safetensors",
|
515 |
+
"model.layers.6.mlp.up_proj.weight": "model-00006-of-00061.safetensors",
|
516 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00006-of-00061.safetensors",
|
517 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00005-of-00061.safetensors",
|
518 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00005-of-00061.safetensors",
|
519 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00005-of-00061.safetensors",
|
520 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00005-of-00061.safetensors",
|
521 |
+
"model.layers.60.input_layernorm.weight": "model-00046-of-00061.safetensors",
|
522 |
+
"model.layers.60.mlp.down_proj.weight": "model-00046-of-00061.safetensors",
|
523 |
+
"model.layers.60.mlp.gate_proj.weight": "model-00046-of-00061.safetensors",
|
524 |
+
"model.layers.60.mlp.up_proj.weight": "model-00046-of-00061.safetensors",
|
525 |
+
"model.layers.60.post_attention_layernorm.weight": "model-00046-of-00061.safetensors",
|
526 |
+
"model.layers.60.self_attn.k_proj.weight": "model-00046-of-00061.safetensors",
|
527 |
+
"model.layers.60.self_attn.o_proj.weight": "model-00046-of-00061.safetensors",
|
528 |
+
"model.layers.60.self_attn.q_proj.weight": "model-00046-of-00061.safetensors",
|
529 |
+
"model.layers.60.self_attn.v_proj.weight": "model-00046-of-00061.safetensors",
|
530 |
+
"model.layers.61.input_layernorm.weight": "model-00047-of-00061.safetensors",
|
531 |
+
"model.layers.61.mlp.down_proj.weight": "model-00047-of-00061.safetensors",
|
532 |
+
"model.layers.61.mlp.gate_proj.weight": "model-00047-of-00061.safetensors",
|
533 |
+
"model.layers.61.mlp.up_proj.weight": "model-00047-of-00061.safetensors",
|
534 |
+
"model.layers.61.post_attention_layernorm.weight": "model-00047-of-00061.safetensors",
|
535 |
+
"model.layers.61.self_attn.k_proj.weight": "model-00046-of-00061.safetensors",
|
536 |
+
"model.layers.61.self_attn.o_proj.weight": "model-00046-of-00061.safetensors",
|
537 |
+
"model.layers.61.self_attn.q_proj.weight": "model-00046-of-00061.safetensors",
|
538 |
+
"model.layers.61.self_attn.v_proj.weight": "model-00046-of-00061.safetensors",
|
539 |
+
"model.layers.62.input_layernorm.weight": "model-00048-of-00061.safetensors",
|
540 |
+
"model.layers.62.mlp.down_proj.weight": "model-00048-of-00061.safetensors",
|
541 |
+
"model.layers.62.mlp.gate_proj.weight": "model-00047-of-00061.safetensors",
|
542 |
+
"model.layers.62.mlp.up_proj.weight": "model-00048-of-00061.safetensors",
|
543 |
+
"model.layers.62.post_attention_layernorm.weight": "model-00048-of-00061.safetensors",
|
544 |
+
"model.layers.62.self_attn.k_proj.weight": "model-00047-of-00061.safetensors",
|
545 |
+
"model.layers.62.self_attn.o_proj.weight": "model-00047-of-00061.safetensors",
|
546 |
+
"model.layers.62.self_attn.q_proj.weight": "model-00047-of-00061.safetensors",
|
547 |
+
"model.layers.62.self_attn.v_proj.weight": "model-00047-of-00061.safetensors",
|
548 |
+
"model.layers.63.input_layernorm.weight": "model-00049-of-00061.safetensors",
|
549 |
+
"model.layers.63.mlp.down_proj.weight": "model-00049-of-00061.safetensors",
|
550 |
+
"model.layers.63.mlp.gate_proj.weight": "model-00048-of-00061.safetensors",
|
551 |
+
"model.layers.63.mlp.up_proj.weight": "model-00048-of-00061.safetensors",
|
552 |
+
"model.layers.63.post_attention_layernorm.weight": "model-00049-of-00061.safetensors",
|
553 |
+
"model.layers.63.self_attn.k_proj.weight": "model-00048-of-00061.safetensors",
|
554 |
+
"model.layers.63.self_attn.o_proj.weight": "model-00048-of-00061.safetensors",
|
555 |
+
"model.layers.63.self_attn.q_proj.weight": "model-00048-of-00061.safetensors",
|
556 |
+
"model.layers.63.self_attn.v_proj.weight": "model-00048-of-00061.safetensors",
|
557 |
+
"model.layers.64.input_layernorm.weight": "model-00049-of-00061.safetensors",
|
558 |
+
"model.layers.64.mlp.down_proj.weight": "model-00049-of-00061.safetensors",
|
559 |
+
"model.layers.64.mlp.gate_proj.weight": "model-00049-of-00061.safetensors",
|
560 |
+
"model.layers.64.mlp.up_proj.weight": "model-00049-of-00061.safetensors",
|
561 |
+
"model.layers.64.post_attention_layernorm.weight": "model-00049-of-00061.safetensors",
|
562 |
+
"model.layers.64.self_attn.k_proj.weight": "model-00049-of-00061.safetensors",
|
563 |
+
"model.layers.64.self_attn.o_proj.weight": "model-00049-of-00061.safetensors",
|
564 |
+
"model.layers.64.self_attn.q_proj.weight": "model-00049-of-00061.safetensors",
|
565 |
+
"model.layers.64.self_attn.v_proj.weight": "model-00049-of-00061.safetensors",
|
566 |
+
"model.layers.65.input_layernorm.weight": "model-00050-of-00061.safetensors",
|
567 |
+
"model.layers.65.mlp.down_proj.weight": "model-00050-of-00061.safetensors",
|
568 |
+
"model.layers.65.mlp.gate_proj.weight": "model-00050-of-00061.safetensors",
|
569 |
+
"model.layers.65.mlp.up_proj.weight": "model-00050-of-00061.safetensors",
|
570 |
+
"model.layers.65.post_attention_layernorm.weight": "model-00050-of-00061.safetensors",
|
571 |
+
"model.layers.65.self_attn.k_proj.weight": "model-00049-of-00061.safetensors",
|
572 |
+
"model.layers.65.self_attn.o_proj.weight": "model-00049-of-00061.safetensors",
|
573 |
+
"model.layers.65.self_attn.q_proj.weight": "model-00049-of-00061.safetensors",
|
574 |
+
"model.layers.65.self_attn.v_proj.weight": "model-00049-of-00061.safetensors",
|
575 |
+
"model.layers.66.input_layernorm.weight": "model-00051-of-00061.safetensors",
|
576 |
+
"model.layers.66.mlp.down_proj.weight": "model-00051-of-00061.safetensors",
|
577 |
+
"model.layers.66.mlp.gate_proj.weight": "model-00050-of-00061.safetensors",
|
578 |
+
"model.layers.66.mlp.up_proj.weight": "model-00051-of-00061.safetensors",
|
579 |
+
"model.layers.66.post_attention_layernorm.weight": "model-00051-of-00061.safetensors",
|
580 |
+
"model.layers.66.self_attn.k_proj.weight": "model-00050-of-00061.safetensors",
|
581 |
+
"model.layers.66.self_attn.o_proj.weight": "model-00050-of-00061.safetensors",
|
582 |
+
"model.layers.66.self_attn.q_proj.weight": "model-00050-of-00061.safetensors",
|
583 |
+
"model.layers.66.self_attn.v_proj.weight": "model-00050-of-00061.safetensors",
|
584 |
+
"model.layers.67.input_layernorm.weight": "model-00052-of-00061.safetensors",
|
585 |
+
"model.layers.67.mlp.down_proj.weight": "model-00052-of-00061.safetensors",
|
586 |
+
"model.layers.67.mlp.gate_proj.weight": "model-00051-of-00061.safetensors",
|
587 |
+
"model.layers.67.mlp.up_proj.weight": "model-00051-of-00061.safetensors",
|
588 |
+
"model.layers.67.post_attention_layernorm.weight": "model-00052-of-00061.safetensors",
|
589 |
+
"model.layers.67.self_attn.k_proj.weight": "model-00051-of-00061.safetensors",
|
590 |
+
"model.layers.67.self_attn.o_proj.weight": "model-00051-of-00061.safetensors",
|
591 |
+
"model.layers.67.self_attn.q_proj.weight": "model-00051-of-00061.safetensors",
|
592 |
+
"model.layers.67.self_attn.v_proj.weight": "model-00051-of-00061.safetensors",
|
593 |
+
"model.layers.68.input_layernorm.weight": "model-00052-of-00061.safetensors",
|
594 |
+
"model.layers.68.mlp.down_proj.weight": "model-00052-of-00061.safetensors",
|
595 |
+
"model.layers.68.mlp.gate_proj.weight": "model-00052-of-00061.safetensors",
|
596 |
+
"model.layers.68.mlp.up_proj.weight": "model-00052-of-00061.safetensors",
|
597 |
+
"model.layers.68.post_attention_layernorm.weight": "model-00052-of-00061.safetensors",
|
598 |
+
"model.layers.68.self_attn.k_proj.weight": "model-00052-of-00061.safetensors",
|
599 |
+
"model.layers.68.self_attn.o_proj.weight": "model-00052-of-00061.safetensors",
|
600 |
+
"model.layers.68.self_attn.q_proj.weight": "model-00052-of-00061.safetensors",
|
601 |
+
"model.layers.68.self_attn.v_proj.weight": "model-00052-of-00061.safetensors",
|
602 |
+
"model.layers.69.input_layernorm.weight": "model-00053-of-00061.safetensors",
|
603 |
+
"model.layers.69.mlp.down_proj.weight": "model-00053-of-00061.safetensors",
|
604 |
+
"model.layers.69.mlp.gate_proj.weight": "model-00053-of-00061.safetensors",
|
605 |
+
"model.layers.69.mlp.up_proj.weight": "model-00053-of-00061.safetensors",
|
606 |
+
"model.layers.69.post_attention_layernorm.weight": "model-00053-of-00061.safetensors",
|
607 |
+
"model.layers.69.self_attn.k_proj.weight": "model-00052-of-00061.safetensors",
|
608 |
+
"model.layers.69.self_attn.o_proj.weight": "model-00052-of-00061.safetensors",
|
609 |
+
"model.layers.69.self_attn.q_proj.weight": "model-00052-of-00061.safetensors",
|
610 |
+
"model.layers.69.self_attn.v_proj.weight": "model-00052-of-00061.safetensors",
|
611 |
+
"model.layers.7.input_layernorm.weight": "model-00007-of-00061.safetensors",
|
612 |
+
"model.layers.7.mlp.down_proj.weight": "model-00007-of-00061.safetensors",
|
613 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00006-of-00061.safetensors",
|
614 |
+
"model.layers.7.mlp.up_proj.weight": "model-00006-of-00061.safetensors",
|
615 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00007-of-00061.safetensors",
|
616 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00006-of-00061.safetensors",
|
617 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00006-of-00061.safetensors",
|
618 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00006-of-00061.safetensors",
|
619 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00006-of-00061.safetensors",
|
620 |
+
"model.layers.70.input_layernorm.weight": "model-00054-of-00061.safetensors",
|
621 |
+
"model.layers.70.mlp.down_proj.weight": "model-00054-of-00061.safetensors",
|
622 |
+
"model.layers.70.mlp.gate_proj.weight": "model-00053-of-00061.safetensors",
|
623 |
+
"model.layers.70.mlp.up_proj.weight": "model-00054-of-00061.safetensors",
|
624 |
+
"model.layers.70.post_attention_layernorm.weight": "model-00054-of-00061.safetensors",
|
625 |
+
"model.layers.70.self_attn.k_proj.weight": "model-00053-of-00061.safetensors",
|
626 |
+
"model.layers.70.self_attn.o_proj.weight": "model-00053-of-00061.safetensors",
|
627 |
+
"model.layers.70.self_attn.q_proj.weight": "model-00053-of-00061.safetensors",
|
628 |
+
"model.layers.70.self_attn.v_proj.weight": "model-00053-of-00061.safetensors",
|
629 |
+
"model.layers.71.input_layernorm.weight": "model-00055-of-00061.safetensors",
|
630 |
+
"model.layers.71.mlp.down_proj.weight": "model-00055-of-00061.safetensors",
|
631 |
+
"model.layers.71.mlp.gate_proj.weight": "model-00054-of-00061.safetensors",
|
632 |
+
"model.layers.71.mlp.up_proj.weight": "model-00054-of-00061.safetensors",
|
633 |
+
"model.layers.71.post_attention_layernorm.weight": "model-00055-of-00061.safetensors",
|
634 |
+
"model.layers.71.self_attn.k_proj.weight": "model-00054-of-00061.safetensors",
|
635 |
+
"model.layers.71.self_attn.o_proj.weight": "model-00054-of-00061.safetensors",
|
636 |
+
"model.layers.71.self_attn.q_proj.weight": "model-00054-of-00061.safetensors",
|
637 |
+
"model.layers.71.self_attn.v_proj.weight": "model-00054-of-00061.safetensors",
|
638 |
+
"model.layers.72.input_layernorm.weight": "model-00055-of-00061.safetensors",
|
639 |
+
"model.layers.72.mlp.down_proj.weight": "model-00055-of-00061.safetensors",
|
640 |
+
"model.layers.72.mlp.gate_proj.weight": "model-00055-of-00061.safetensors",
|
641 |
+
"model.layers.72.mlp.up_proj.weight": "model-00055-of-00061.safetensors",
|
642 |
+
"model.layers.72.post_attention_layernorm.weight": "model-00055-of-00061.safetensors",
|
643 |
+
"model.layers.72.self_attn.k_proj.weight": "model-00055-of-00061.safetensors",
|
644 |
+
"model.layers.72.self_attn.o_proj.weight": "model-00055-of-00061.safetensors",
|
645 |
+
"model.layers.72.self_attn.q_proj.weight": "model-00055-of-00061.safetensors",
|
646 |
+
"model.layers.72.self_attn.v_proj.weight": "model-00055-of-00061.safetensors",
|
647 |
+
"model.layers.73.input_layernorm.weight": "model-00056-of-00061.safetensors",
|
648 |
+
"model.layers.73.mlp.down_proj.weight": "model-00056-of-00061.safetensors",
|
649 |
+
"model.layers.73.mlp.gate_proj.weight": "model-00056-of-00061.safetensors",
|
650 |
+
"model.layers.73.mlp.up_proj.weight": "model-00056-of-00061.safetensors",
|
651 |
+
"model.layers.73.post_attention_layernorm.weight": "model-00056-of-00061.safetensors",
|
652 |
+
"model.layers.73.self_attn.k_proj.weight": "model-00055-of-00061.safetensors",
|
653 |
+
"model.layers.73.self_attn.o_proj.weight": "model-00055-of-00061.safetensors",
|
654 |
+
"model.layers.73.self_attn.q_proj.weight": "model-00055-of-00061.safetensors",
|
655 |
+
"model.layers.73.self_attn.v_proj.weight": "model-00055-of-00061.safetensors",
|
656 |
+
"model.layers.74.input_layernorm.weight": "model-00057-of-00061.safetensors",
|
657 |
+
"model.layers.74.mlp.down_proj.weight": "model-00057-of-00061.safetensors",
|
658 |
+
"model.layers.74.mlp.gate_proj.weight": "model-00056-of-00061.safetensors",
|
659 |
+
"model.layers.74.mlp.up_proj.weight": "model-00057-of-00061.safetensors",
|
660 |
+
"model.layers.74.post_attention_layernorm.weight": "model-00057-of-00061.safetensors",
|
661 |
+
"model.layers.74.self_attn.k_proj.weight": "model-00056-of-00061.safetensors",
|
662 |
+
"model.layers.74.self_attn.o_proj.weight": "model-00056-of-00061.safetensors",
|
663 |
+
"model.layers.74.self_attn.q_proj.weight": "model-00056-of-00061.safetensors",
|
664 |
+
"model.layers.74.self_attn.v_proj.weight": "model-00056-of-00061.safetensors",
|
665 |
+
"model.layers.75.input_layernorm.weight": "model-00058-of-00061.safetensors",
|
666 |
+
"model.layers.75.mlp.down_proj.weight": "model-00058-of-00061.safetensors",
|
667 |
+
"model.layers.75.mlp.gate_proj.weight": "model-00057-of-00061.safetensors",
|
668 |
+
"model.layers.75.mlp.up_proj.weight": "model-00057-of-00061.safetensors",
|
669 |
+
"model.layers.75.post_attention_layernorm.weight": "model-00058-of-00061.safetensors",
|
670 |
+
"model.layers.75.self_attn.k_proj.weight": "model-00057-of-00061.safetensors",
|
671 |
+
"model.layers.75.self_attn.o_proj.weight": "model-00057-of-00061.safetensors",
|
672 |
+
"model.layers.75.self_attn.q_proj.weight": "model-00057-of-00061.safetensors",
|
673 |
+
"model.layers.75.self_attn.v_proj.weight": "model-00057-of-00061.safetensors",
|
674 |
+
"model.layers.76.input_layernorm.weight": "model-00058-of-00061.safetensors",
|
675 |
+
"model.layers.76.mlp.down_proj.weight": "model-00058-of-00061.safetensors",
|
676 |
+
"model.layers.76.mlp.gate_proj.weight": "model-00058-of-00061.safetensors",
|
677 |
+
"model.layers.76.mlp.up_proj.weight": "model-00058-of-00061.safetensors",
|
678 |
+
"model.layers.76.post_attention_layernorm.weight": "model-00058-of-00061.safetensors",
|
679 |
+
"model.layers.76.self_attn.k_proj.weight": "model-00058-of-00061.safetensors",
|
680 |
+
"model.layers.76.self_attn.o_proj.weight": "model-00058-of-00061.safetensors",
|
681 |
+
"model.layers.76.self_attn.q_proj.weight": "model-00058-of-00061.safetensors",
|
682 |
+
"model.layers.76.self_attn.v_proj.weight": "model-00058-of-00061.safetensors",
|
683 |
+
"model.layers.77.input_layernorm.weight": "model-00059-of-00061.safetensors",
|
684 |
+
"model.layers.77.mlp.down_proj.weight": "model-00059-of-00061.safetensors",
|
685 |
+
"model.layers.77.mlp.gate_proj.weight": "model-00059-of-00061.safetensors",
|
686 |
+
"model.layers.77.mlp.up_proj.weight": "model-00059-of-00061.safetensors",
|
687 |
+
"model.layers.77.post_attention_layernorm.weight": "model-00059-of-00061.safetensors",
|
688 |
+
"model.layers.77.self_attn.k_proj.weight": "model-00058-of-00061.safetensors",
|
689 |
+
"model.layers.77.self_attn.o_proj.weight": "model-00058-of-00061.safetensors",
|
690 |
+
"model.layers.77.self_attn.q_proj.weight": "model-00058-of-00061.safetensors",
|
691 |
+
"model.layers.77.self_attn.v_proj.weight": "model-00058-of-00061.safetensors",
|
692 |
+
"model.layers.78.input_layernorm.weight": "model-00060-of-00061.safetensors",
|
693 |
+
"model.layers.78.mlp.down_proj.weight": "model-00060-of-00061.safetensors",
|
694 |
+
"model.layers.78.mlp.gate_proj.weight": "model-00059-of-00061.safetensors",
|
695 |
+
"model.layers.78.mlp.up_proj.weight": "model-00060-of-00061.safetensors",
|
696 |
+
"model.layers.78.post_attention_layernorm.weight": "model-00060-of-00061.safetensors",
|
697 |
+
"model.layers.78.self_attn.k_proj.weight": "model-00059-of-00061.safetensors",
|
698 |
+
"model.layers.78.self_attn.o_proj.weight": "model-00059-of-00061.safetensors",
|
699 |
+
"model.layers.78.self_attn.q_proj.weight": "model-00059-of-00061.safetensors",
|
700 |
+
"model.layers.78.self_attn.v_proj.weight": "model-00059-of-00061.safetensors",
|
701 |
+
"model.layers.79.input_layernorm.weight": "model-00061-of-00061.safetensors",
|
702 |
+
"model.layers.79.mlp.down_proj.weight": "model-00061-of-00061.safetensors",
|
703 |
+
"model.layers.79.mlp.gate_proj.weight": "model-00060-of-00061.safetensors",
|
704 |
+
"model.layers.79.mlp.up_proj.weight": "model-00060-of-00061.safetensors",
|
705 |
+
"model.layers.79.post_attention_layernorm.weight": "model-00061-of-00061.safetensors",
|
706 |
+
"model.layers.79.self_attn.k_proj.weight": "model-00060-of-00061.safetensors",
|
707 |
+
"model.layers.79.self_attn.o_proj.weight": "model-00060-of-00061.safetensors",
|
708 |
+
"model.layers.79.self_attn.q_proj.weight": "model-00060-of-00061.safetensors",
|
709 |
+
"model.layers.79.self_attn.v_proj.weight": "model-00060-of-00061.safetensors",
|
710 |
+
"model.layers.8.input_layernorm.weight": "model-00007-of-00061.safetensors",
|
711 |
+
"model.layers.8.mlp.down_proj.weight": "model-00007-of-00061.safetensors",
|
712 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00007-of-00061.safetensors",
|
713 |
+
"model.layers.8.mlp.up_proj.weight": "model-00007-of-00061.safetensors",
|
714 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00007-of-00061.safetensors",
|
715 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00007-of-00061.safetensors",
|
716 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00007-of-00061.safetensors",
|
717 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00007-of-00061.safetensors",
|
718 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00007-of-00061.safetensors",
|
719 |
+
"model.layers.9.input_layernorm.weight": "model-00008-of-00061.safetensors",
|
720 |
+
"model.layers.9.mlp.down_proj.weight": "model-00008-of-00061.safetensors",
|
721 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00008-of-00061.safetensors",
|
722 |
+
"model.layers.9.mlp.up_proj.weight": "model-00008-of-00061.safetensors",
|
723 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00008-of-00061.safetensors",
|
724 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00007-of-00061.safetensors",
|
725 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00007-of-00061.safetensors",
|
726 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00007-of-00061.safetensors",
|
727 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00007-of-00061.safetensors",
|
728 |
+
"model.norm.weight": "model-00061-of-00061.safetensors"
|
729 |
+
}
|
730 |
+
}
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "</s>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
The diff for this file is too large to render.
See raw diff
|
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"bos_token": "<s>",
|
31 |
+
"clean_up_tokenization_spaces": false,
|
32 |
+
"eos_token": "</s>",
|
33 |
+
"legacy": false,
|
34 |
+
"model_max_length": 1000000000000000019884624838656,
|
35 |
+
"pad_token": "</s>",
|
36 |
+
"sp_model_kwargs": {},
|
37 |
+
"tokenizer_class": "LlamaTokenizer",
|
38 |
+
"unk_token": "<unk>",
|
39 |
+
"use_default_system_prompt": true
|
40 |
+
}
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sequelbox/SpellBlade",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 8192,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 28672,
|
14 |
+
"max_position_embeddings": 4096,
|
15 |
+
"model_type": "llama",
|
16 |
+
"num_attention_heads": 64,
|
17 |
+
"num_hidden_layers": 80,
|
18 |
+
"num_key_value_heads": 8,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pretraining_tp": 1,
|
21 |
+
"rms_norm_eps": 1e-05,
|
22 |
+
"rope_scaling": null,
|
23 |
+
"rope_theta": 10000.0,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.36.2",
|
27 |
+
"use_cache": false,
|
28 |
+
"vocab_size": 32000
|
29 |
+
}
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.36.2",
|
7 |
+
"use_cache": false
|
8 |
+
}
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5352d1d26fb1a4187e6655674d3d7b9e65578dd4f88e05e3053008c53acfe878
|
3 |
+
size 4806739440
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e28499b8d3d1d6db61e3385c3736a1d8faaf891a2fc2ce89b71739f3b23a551
|
3 |
+
size 4630578440
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a69dced67f39f9f6c60816642904fba0e123102682df36a7703c4da3ed2b3478
|
3 |
+
size 4362142864
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c06846f87b3828821532d315409db90421ec463a8fb3057a2e28d5ebff4b368b
|
3 |
+
size 4966188864
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da16ef7dba13ec0c23aa5721c1c39a1793f0aac98a122942afb50bfd33ab0d12
|
3 |
+
size 4362142864
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7af94011077c5ff5cb6e41b28360c9ddecb491db47380274a61e687cea70001
|
3 |
+
size 4362142864
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54c6b0177452845eef4a53ff47787150687efe0e8ae05cf3bcdb45ae9b85ce95
|
3 |
+
size 4966188864
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b60807d26c02d8c09f84aae1e630e4c298eec45b90bc8a9bb4ff21b9886a65f0
|
3 |
+
size 4362142880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20fafa021662573a48905ec08221dd85ed0181a0088055832fa95997f92cbed6
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d86186d95a9895d1c2942dedaa390c45b8749fa7dc816461ee45fc08c764b4ec
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70fa9d2b4d1922ed02f61b527528d56444692ec3fc136998eb06eeb344194016
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e696bdf1acd81583bd846d5355bd401876398bc3b1ba8977846a1125dcaf1cc6
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0eed999502c6eeff2b1a5158b16f7db3d3121c957ec9143eea3552eef8562082
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ed50e80d456ce42bab05404bc5a13f36cd668a8acfd055cbe945647b79bdf69
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24534c62918587285be9533a4cde711864570ecc425350fbb0278a591f2c6393
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:417705ff101a6516ed3d3a018fe5968109e6505eaf88966233fc3485956a1efa
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50ac743e5437c9321360795d533c3767a4454ba373d0ab781970c5b67d871568
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35bb3eb4e4d049834b41635404827f9df27ab3c996aa9734a25da421c200c2a0
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1cedc43136ff86f562bc67c9088d54e9b8e495c0052f31e975dda03d715a0c1
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:692706f2b9e2c65accd5e8c2cafbb12447f9b97968c30ceb7645de7f70a5111f
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70f933252a5599e0388aeeaf126f2a3c14f9be83724ed8a08bad491511ec7269
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:454788a5f8ed3ca95d84b6fae013ab9832d517c1714239240e80016d7a1c4d15
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aefb059c46a9998dfc942961bfeedc719e854ed1189c0b4102709acf297e539
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15de4c62eeee6d8c6c58039c17c9dc1d298b73e93838880c5d8242842b75b05f
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f8a8d5a10febcf110b98976ffcec9ca8cd17d1fda8d7046b6726b04728b412c
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4afd46ecebb697ade9f2a6d9323a635e43bbb1f3364c91fe3434162eaffdb8af
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c7c91daeae73cfdbcb3167604a4e002fb3803e4ed0e21a5c9cb1fde61688835
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:602add813be1febc8a4330796075ad18927aea3c5215dea0f976fd8a87cac61f
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9d2db54aecdf9bdf854cb640be346b4023ef2857abeb720297c59627a7ea8e2
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c49e0ba95fa93516bb09eb6d6d8cdc1c9c36d9f11ea24d64834e5a81817a41a9
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74afde4aa694a3f0e7568bfc5c2cc72d14e632fb0b8c1c0b52cc34131588d03e
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c553a79c4dc7527158d1ee8c63e6cfdfed7cb544247e14f8a262a869db928ac1
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e64f4c3e6c63b879ab412b1b10c81411e2a885deea883c4d1cc1392e6ad06ee6
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a7c8d6bf49315f4a15792aad78c45ad306f997fc870fa639e82c2411c361362
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d57fb6faaecc3f1ce6646bd3c1576712e7e00e82bebc099475710f1a30ed133e
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2543b012997bc2f260198166c51c328e98edd2e4a764317c43147aa6ee1834d6
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2caf2871528d1a8a5871d90208b53312e3b821f09ca8c9aa6ac7ef8e55fc6a4
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:818c322947e1675269b0e4513cc8cb8168849f46e9ce8c4bec12b57c6a5a3fbe
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0f4aae1fc83d6c5a7235e7255b5f86c28b42b1077d1237878b59bcc0ac53b86
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5ad4528388d48d8854250724ad0025e05091bb4af040186d0cd11aa508e7a90
|
3 |
+
size 4966188880
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8bf95940073d1e160e5349dbacafff6aa9b93d56ee7dc17500462c38b913dec
|
3 |
+
size 4362142872
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1f30feb65fd41e48016368c5c12a077ae2c2c01cb2690bf465288c9141eefaa
|
3 |
+
size 4362142872
|