dacorvo HF staff commited on
Commit
e7179a3
1 Parent(s): be28bda

Update inference-cache-config/llama-variants.json

Browse files
inference-cache-config/llama-variants.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "defog/sqlcoder-7b-2": [
3
  {
4
  "batch_size": 1,
5
  "sequence_length": 4096,
@@ -13,7 +13,7 @@
13
  "auto_cast_type": "fp16"
14
  }
15
  ],
16
- "m-a-p/OpenCodeInterpreter-DS-6.7B": [
17
  {
18
  "batch_size": 1,
19
  "sequence_length": 4096,
@@ -27,37 +27,37 @@
27
  "auto_cast_type": "fp16"
28
  }
29
  ],
30
- "ibm/labradorite-13b": [
31
  {
32
  "batch_size": 1,
33
  "sequence_length": 4096,
34
- "num_cores": 8,
35
  "auto_cast_type": "fp16"
36
  },
37
  {
38
  "batch_size": 4,
39
  "sequence_length": 4096,
40
- "num_cores": 8,
41
- "auto_cast_type": "fp16"
42
- },
43
- {
44
- "batch_size": 8,
45
- "sequence_length": 4096,
46
- "num_cores": 8,
47
  "auto_cast_type": "fp16"
48
  }
49
  ],
50
- "abacusai/Smaug-72B-v0.1": [
51
  {
52
  "batch_size": 1,
53
  "sequence_length": 4096,
54
- "num_cores": 24,
55
  "auto_cast_type": "fp16"
56
  },
57
  {
58
  "batch_size": 4,
59
  "sequence_length": 4096,
60
- "num_cores": 24,
 
 
 
 
 
 
61
  "auto_cast_type": "fp16"
62
  }
63
  ],
@@ -103,21 +103,21 @@
103
  "auto_cast_type": "fp16"
104
  }
105
  ],
106
- "princeton-nlp/Sheared-LLaMA-1.3B": [
107
  {
108
  "batch_size": 1,
109
  "sequence_length": 4096,
110
- "num_cores": 2,
111
  "auto_cast_type": "fp16"
112
  },
113
  {
114
  "batch_size": 4,
115
  "sequence_length": 4096,
116
- "num_cores": 2,
117
  "auto_cast_type": "fp16"
118
  }
119
  ],
120
- "01-ai/Yi-34B-200K": [
121
  {
122
  "batch_size": 1,
123
  "sequence_length": 4096,
 
1
  {
2
+ "princeton-nlp/Sheared-LLaMA-1.3B": [
3
  {
4
  "batch_size": 1,
5
  "sequence_length": 4096,
 
13
  "auto_cast_type": "fp16"
14
  }
15
  ],
16
+ "defog/sqlcoder-7b-2": [
17
  {
18
  "batch_size": 1,
19
  "sequence_length": 4096,
 
27
  "auto_cast_type": "fp16"
28
  }
29
  ],
30
+ "m-a-p/OpenCodeInterpreter-DS-6.7B": [
31
  {
32
  "batch_size": 1,
33
  "sequence_length": 4096,
34
+ "num_cores": 2,
35
  "auto_cast_type": "fp16"
36
  },
37
  {
38
  "batch_size": 4,
39
  "sequence_length": 4096,
40
+ "num_cores": 2,
 
 
 
 
 
 
41
  "auto_cast_type": "fp16"
42
  }
43
  ],
44
+ "ibm/labradorite-13b": [
45
  {
46
  "batch_size": 1,
47
  "sequence_length": 4096,
48
+ "num_cores": 8,
49
  "auto_cast_type": "fp16"
50
  },
51
  {
52
  "batch_size": 4,
53
  "sequence_length": 4096,
54
+ "num_cores": 8,
55
+ "auto_cast_type": "fp16"
56
+ },
57
+ {
58
+ "batch_size": 8,
59
+ "sequence_length": 4096,
60
+ "num_cores": 8,
61
  "auto_cast_type": "fp16"
62
  }
63
  ],
 
103
  "auto_cast_type": "fp16"
104
  }
105
  ],
106
+ "01-ai/Yi-34B-200K": [
107
  {
108
  "batch_size": 1,
109
  "sequence_length": 4096,
110
+ "num_cores": 24,
111
  "auto_cast_type": "fp16"
112
  },
113
  {
114
  "batch_size": 4,
115
  "sequence_length": 4096,
116
+ "num_cores": 24,
117
  "auto_cast_type": "fp16"
118
  }
119
  ],
120
+ "abacusai/Smaug-72B-v0.1": [
121
  {
122
  "batch_size": 1,
123
  "sequence_length": 4096,