dacorvo HF staff commited on
Commit
5694f75
1 Parent(s): 89d090e

Update inference-cache-config/llama3-70b.json

Browse files
inference-cache-config/llama3-70b.json CHANGED
@@ -4,13 +4,13 @@
4
  "batch_size": 1,
5
  "sequence_length": 4096,
6
  "num_cores": 24,
7
- "auto_cast_type": "fp16"
8
  },
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
12
  "num_cores": 24,
13
- "auto_cast_type": "fp16"
14
  }
15
  ]
16
  }
 
4
  "batch_size": 1,
5
  "sequence_length": 4096,
6
  "num_cores": 24,
7
+ "auto_cast_type": "bf16"
8
  },
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
12
  "num_cores": 24,
13
+ "auto_cast_type": "bf16"
14
  }
15
  ]
16
  }