bowenbaoamd commited on
Commit
c3bd407
·
verified ·
1 Parent(s): 84b299a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -2,7 +2,7 @@
2
  base_model: databricks/dbrx-instruct
3
  license: other
4
  ---
5
- # dbrx_moe_fp8_test
6
  - ## Introduction
7
  This model was created by applying [Quark](https://quark.docs.amd.com/latest/index.html) with calibration samples from Pile dataset.
8
  - ## Quantization Stragegy
@@ -18,7 +18,7 @@ export MODEL_DIR = [local model checkpoint folder] or databricks/dbrx-instruct
18
  # single GPU
19
  python3 quantize_quark.py \
20
  --model_dir $MODEL_DIR \
21
- --output_dir dbrx_moe_fp8_test \
22
  --quant_scheme w_fp8_a_fp8 \
23
  --kv_cache_dtype fp8 \
24
  --num_calib_data 128 \
@@ -27,7 +27,7 @@ python3 quantize_quark.py \
27
  # If model size is too large for single GPU, please use multi GPU instead.
28
  python3 quantize_quark.py
29
  --model_dir $MODEL_DIR \
30
- --output_dir dbrx_moe_fp8_test\
31
  --quant_scheme w_fp8_a_fp8 \
32
  --kv_cache_dtype fp8 \
33
  --num_calib_data 128 \
@@ -50,7 +50,7 @@ The quantization evaluation results are conducted in pseudo-quantization mode, w
50
  </td>
51
  <td><strong>dbrx-instruct </strong>
52
  </td>
53
- <td><strong>dbrx_moe_fp8_test(this model)</strong>
54
  </td>
55
  </tr>
56
  <tr>
 
2
  base_model: databricks/dbrx-instruct
3
  license: other
4
  ---
5
+ # dbrx-instruct-FP8-KV
6
  - ## Introduction
7
  This model was created by applying [Quark](https://quark.docs.amd.com/latest/index.html) with calibration samples from Pile dataset.
8
  - ## Quantization Stragegy
 
18
  # single GPU
19
  python3 quantize_quark.py \
20
  --model_dir $MODEL_DIR \
21
+ --output_dir dbrx-instruct-FP8-KV \
22
  --quant_scheme w_fp8_a_fp8 \
23
  --kv_cache_dtype fp8 \
24
  --num_calib_data 128 \
 
27
  # If model size is too large for single GPU, please use multi GPU instead.
28
  python3 quantize_quark.py
29
  --model_dir $MODEL_DIR \
30
+ --output_dir dbrx-instruct-FP8-KV\
31
  --quant_scheme w_fp8_a_fp8 \
32
  --kv_cache_dtype fp8 \
33
  --num_calib_data 128 \
 
50
  </td>
51
  <td><strong>dbrx-instruct </strong>
52
  </td>
53
+ <td><strong>dbrx-instruct-FP8-KV(this model)</strong>
54
  </td>
55
  </tr>
56
  <tr>