Upload folder using huggingface_hub (#2)
Browse files- 50730b4190088945a6da9fd5035c7eaec609a30be6068c13779dbacd5eb6e1a2 (b96a20a7f15d12b9ca9c770da805544feb6a553a)
- c3bfb91dc78621aa6f5aa630042e6dc8745e61bf5e973093a8ae32c747a0b1c2 (6d694c8ba0a236dad9f3ec7c27e4fa8c08463652)
- 00cec71e51b9014539bf09b3991c3739fd170dd5ac93d7b66a05762043fa53ed (61d1ed0ac517e90dcf5461e19dedff56bf847ffa)
- config.json +2 -2
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +2 -2
- model.safetensors.index.json +24 -1
- smash_config.json +1 -1
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/covalent/.cache/models/
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
@@ -20,7 +20,7 @@
|
|
20 |
"mlp_bias": false,
|
21 |
"model_type": "llama",
|
22 |
"num_attention_heads": 32,
|
23 |
-
"num_hidden_layers":
|
24 |
"num_key_value_heads": 8,
|
25 |
"pad_token_id": 128004,
|
26 |
"pretraining_tp": 1,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/covalent/.cache/models/tmped9bmhvsju5cwczt",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
20 |
"mlp_bias": false,
|
21 |
"model_type": "llama",
|
22 |
"num_attention_heads": 32,
|
23 |
+
"num_hidden_layers": 32,
|
24 |
"num_key_value_heads": 8,
|
25 |
"pad_token_id": 128004,
|
26 |
"pretraining_tp": 1,
|
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4996772687
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1809ed2a03ad626cf842a3b23ebb12bcd961d046584b96a1866022d2f63e3a42
|
3 |
size 4996772687
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:516fb1124ecede3b7fa433594df126871f13fe25d5ed69cf94830f9adaac99b4
|
3 |
+
size 4090014257
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00002-of-00002.safetensors",
|
@@ -580,6 +580,29 @@
|
|
580 |
"model.layers.30.self_attn.v_proj.SCB": "model-00002-of-00002.safetensors",
|
581 |
"model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
582 |
"model.layers.30.self_attn.v_proj.weight_format": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
583 |
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
584 |
"model.layers.4.mlp.down_proj.SCB": "model-00001-of-00002.safetensors",
|
585 |
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 9086705888
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00002-of-00002.safetensors",
|
|
|
580 |
"model.layers.30.self_attn.v_proj.SCB": "model-00002-of-00002.safetensors",
|
581 |
"model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
582 |
"model.layers.30.self_attn.v_proj.weight_format": "model-00002-of-00002.safetensors",
|
583 |
+
"model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
584 |
+
"model.layers.31.mlp.down_proj.SCB": "model-00002-of-00002.safetensors",
|
585 |
+
"model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
586 |
+
"model.layers.31.mlp.down_proj.weight_format": "model-00002-of-00002.safetensors",
|
587 |
+
"model.layers.31.mlp.gate_proj.SCB": "model-00002-of-00002.safetensors",
|
588 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
589 |
+
"model.layers.31.mlp.gate_proj.weight_format": "model-00002-of-00002.safetensors",
|
590 |
+
"model.layers.31.mlp.up_proj.SCB": "model-00002-of-00002.safetensors",
|
591 |
+
"model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
592 |
+
"model.layers.31.mlp.up_proj.weight_format": "model-00002-of-00002.safetensors",
|
593 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
594 |
+
"model.layers.31.self_attn.k_proj.SCB": "model-00002-of-00002.safetensors",
|
595 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
596 |
+
"model.layers.31.self_attn.k_proj.weight_format": "model-00002-of-00002.safetensors",
|
597 |
+
"model.layers.31.self_attn.o_proj.SCB": "model-00002-of-00002.safetensors",
|
598 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
599 |
+
"model.layers.31.self_attn.o_proj.weight_format": "model-00002-of-00002.safetensors",
|
600 |
+
"model.layers.31.self_attn.q_proj.SCB": "model-00002-of-00002.safetensors",
|
601 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
602 |
+
"model.layers.31.self_attn.q_proj.weight_format": "model-00002-of-00002.safetensors",
|
603 |
+
"model.layers.31.self_attn.v_proj.SCB": "model-00002-of-00002.safetensors",
|
604 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
605 |
+
"model.layers.31.self_attn.v_proj.weight_format": "model-00002-of-00002.safetensors",
|
606 |
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
607 |
"model.layers.4.mlp.down_proj.SCB": "model-00001-of-00002.safetensors",
|
608 |
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
smash_config.json
CHANGED
@@ -28,7 +28,7 @@
|
|
28 |
"quant_llm-int8_weight_bits": 8,
|
29 |
"max_batch_size": 1,
|
30 |
"device": "cuda",
|
31 |
-
"cache_dir": "/covalent/.cache/models/
|
32 |
"task": "",
|
33 |
"save_load_fn": "bitsandbytes",
|
34 |
"save_load_fn_args": {}
|
|
|
28 |
"quant_llm-int8_weight_bits": 8,
|
29 |
"max_batch_size": 1,
|
30 |
"device": "cuda",
|
31 |
+
"cache_dir": "/covalent/.cache/models/tmped9bmhvs",
|
32 |
"task": "",
|
33 |
"save_load_fn": "bitsandbytes",
|
34 |
"save_load_fn_args": {}
|