dacorvo HF staff commited on
Commit
17e7257
1 Parent(s): 28f3bad

Remove llama2 7B config for 24 cores

Browse files
Files changed (1) hide show
  1. inference-cache-config/llama2.json +0 -36
inference-cache-config/llama2.json CHANGED
@@ -6,18 +6,6 @@
6
  "num_cores": 2,
7
  "auto_cast_type": "fp16"
8
  },
9
- {
10
- "batch_size": 1,
11
- "sequence_length": 4096,
12
- "num_cores": 8,
13
- "auto_cast_type": "fp16"
14
- },
15
- {
16
- "batch_size": 1,
17
- "sequence_length": 4096,
18
- "num_cores": 24,
19
- "auto_cast_type": "fp16"
20
- },
21
  {
22
  "batch_size": 4,
23
  "sequence_length": 4096,
@@ -30,47 +18,23 @@
30
  "num_cores": 8,
31
  "auto_cast_type": "fp16"
32
  },
33
- {
34
- "batch_size": 4,
35
- "sequence_length": 4096,
36
- "num_cores": 24,
37
- "auto_cast_type": "fp16"
38
- },
39
  {
40
  "batch_size": 8,
41
  "sequence_length": 4096,
42
  "num_cores": 8,
43
  "auto_cast_type": "fp16"
44
  },
45
- {
46
- "batch_size": 8,
47
- "sequence_length": 4096,
48
- "num_cores": 24,
49
- "auto_cast_type": "fp16"
50
- },
51
  {
52
  "batch_size": 16,
53
  "sequence_length": 4096,
54
  "num_cores": 8,
55
  "auto_cast_type": "fp16"
56
  },
57
- {
58
- "batch_size": 16,
59
- "sequence_length": 4096,
60
- "num_cores": 24,
61
- "auto_cast_type": "fp16"
62
- },
63
  {
64
  "batch_size": 32,
65
  "sequence_length": 4096,
66
  "num_cores": 8,
67
  "auto_cast_type": "fp16"
68
- },
69
- {
70
- "batch_size": 32,
71
- "sequence_length": 4096,
72
- "num_cores": 24,
73
- "auto_cast_type": "fp16"
74
  }
75
  ],
76
  "meta-llama/Llama-2-13b-chat-hf": [
 
6
  "num_cores": 2,
7
  "auto_cast_type": "fp16"
8
  },
 
 
 
 
 
 
 
 
 
 
 
 
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
 
18
  "num_cores": 8,
19
  "auto_cast_type": "fp16"
20
  },
 
 
 
 
 
 
21
  {
22
  "batch_size": 8,
23
  "sequence_length": 4096,
24
  "num_cores": 8,
25
  "auto_cast_type": "fp16"
26
  },
 
 
 
 
 
 
27
  {
28
  "batch_size": 16,
29
  "sequence_length": 4096,
30
  "num_cores": 8,
31
  "auto_cast_type": "fp16"
32
  },
 
 
 
 
 
 
33
  {
34
  "batch_size": 32,
35
  "sequence_length": 4096,
36
  "num_cores": 8,
37
  "auto_cast_type": "fp16"
 
 
 
 
 
 
38
  }
39
  ],
40
  "meta-llama/Llama-2-13b-chat-hf": [