sharpenb commited on
Commit
bce11e8
·
verified ·
1 Parent(s): 670fccc

Upload folder using huggingface_hub (#2)

Browse files

- 50730b4190088945a6da9fd5035c7eaec609a30be6068c13779dbacd5eb6e1a2 (b96a20a7f15d12b9ca9c770da805544feb6a553a)
- c3bfb91dc78621aa6f5aa630042e6dc8745e61bf5e973093a8ae32c747a0b1c2 (6d694c8ba0a236dad9f3ec7c27e4fa8c08463652)
- 00cec71e51b9014539bf09b3991c3739fd170dd5ac93d7b66a05762043fa53ed (61d1ed0ac517e90dcf5461e19dedff56bf847ffa)

config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/covalent/.cache/models/tmpl0dooqs33mbjpzya",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -20,7 +20,7 @@
20
  "mlp_bias": false,
21
  "model_type": "llama",
22
  "num_attention_heads": 32,
23
- "num_hidden_layers": 31,
24
  "num_key_value_heads": 8,
25
  "pad_token_id": 128004,
26
  "pretraining_tp": 1,
 
1
  {
2
+ "_name_or_path": "/covalent/.cache/models/tmped9bmhvsju5cwczt",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
20
  "mlp_bias": false,
21
  "model_type": "llama",
22
  "num_attention_heads": 32,
23
+ "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
25
  "pad_token_id": 128004,
26
  "pretraining_tp": 1,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:605d9d315c745da44dc1978e6d93752af07b15696be5077a1f37ce6b6064aaaf
3
  size 4996772687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1809ed2a03ad626cf842a3b23ebb12bcd961d046584b96a1866022d2f63e3a42
3
  size 4996772687
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ce79c2c29a31b7831d945d6f77eb3fe78081982bb43996f6f511b168d70bd71
3
- size 3871719490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:516fb1124ecede3b7fa433594df126871f13fe25d5ed69cf94830f9adaac99b4
3
+ size 4090014257
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 8868413657
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00002.safetensors",
@@ -580,6 +580,29 @@
580
  "model.layers.30.self_attn.v_proj.SCB": "model-00002-of-00002.safetensors",
581
  "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
582
  "model.layers.30.self_attn.v_proj.weight_format": "model-00002-of-00002.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
584
  "model.layers.4.mlp.down_proj.SCB": "model-00001-of-00002.safetensors",
585
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 9086705888
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00002.safetensors",
 
580
  "model.layers.30.self_attn.v_proj.SCB": "model-00002-of-00002.safetensors",
581
  "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
582
  "model.layers.30.self_attn.v_proj.weight_format": "model-00002-of-00002.safetensors",
583
+ "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
584
+ "model.layers.31.mlp.down_proj.SCB": "model-00002-of-00002.safetensors",
585
+ "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
586
+ "model.layers.31.mlp.down_proj.weight_format": "model-00002-of-00002.safetensors",
587
+ "model.layers.31.mlp.gate_proj.SCB": "model-00002-of-00002.safetensors",
588
+ "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
589
+ "model.layers.31.mlp.gate_proj.weight_format": "model-00002-of-00002.safetensors",
590
+ "model.layers.31.mlp.up_proj.SCB": "model-00002-of-00002.safetensors",
591
+ "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
592
+ "model.layers.31.mlp.up_proj.weight_format": "model-00002-of-00002.safetensors",
593
+ "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
594
+ "model.layers.31.self_attn.k_proj.SCB": "model-00002-of-00002.safetensors",
595
+ "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
596
+ "model.layers.31.self_attn.k_proj.weight_format": "model-00002-of-00002.safetensors",
597
+ "model.layers.31.self_attn.o_proj.SCB": "model-00002-of-00002.safetensors",
598
+ "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
599
+ "model.layers.31.self_attn.o_proj.weight_format": "model-00002-of-00002.safetensors",
600
+ "model.layers.31.self_attn.q_proj.SCB": "model-00002-of-00002.safetensors",
601
+ "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
602
+ "model.layers.31.self_attn.q_proj.weight_format": "model-00002-of-00002.safetensors",
603
+ "model.layers.31.self_attn.v_proj.SCB": "model-00002-of-00002.safetensors",
604
+ "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
605
+ "model.layers.31.self_attn.v_proj.weight_format": "model-00002-of-00002.safetensors",
606
  "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
607
  "model.layers.4.mlp.down_proj.SCB": "model-00001-of-00002.safetensors",
608
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
smash_config.json CHANGED
@@ -28,7 +28,7 @@
28
  "quant_llm-int8_weight_bits": 8,
29
  "max_batch_size": 1,
30
  "device": "cuda",
31
- "cache_dir": "/covalent/.cache/models/tmpl0dooqs3",
32
  "task": "",
33
  "save_load_fn": "bitsandbytes",
34
  "save_load_fn_args": {}
 
28
  "quant_llm-int8_weight_bits": 8,
29
  "max_batch_size": 1,
30
  "device": "cuda",
31
+ "cache_dir": "/covalent/.cache/models/tmped9bmhvs",
32
  "task": "",
33
  "save_load_fn": "bitsandbytes",
34
  "save_load_fn_args": {}