adamo1139 commited on
Commit
8753b02
1 Parent(s): 13aa812

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -5
README.md CHANGED
@@ -75,18 +75,23 @@ quant_stage:
75
  weights:
76
  num_bits: 8
77
  type: float
78
- strategy: tensor
79
  dynamic: false
80
  symmetric: true
81
  input_activations:
82
  num_bits: 8
83
  type: float
84
- strategy: tensor
85
- dynamic: false
86
  symmetric: true
87
  targets: ["Linear"]
 
 
 
 
 
 
88
  """
89
-
90
  model_stub = "NousResearch/Hermes-3-Llama-3.1-8B"
91
  model_name = model_stub.split("/")[-1]
92
 
@@ -99,7 +104,7 @@ model = SparseAutoModelForCausalLM.from_pretrained(
99
  )
100
  tokenizer = AutoTokenizer.from_pretrained(model_stub)
101
 
102
- output_dir = f"./{model_name}-FP8"
103
 
104
  DATASET_ID = "HuggingFaceH4/ultrachat_200k"
105
  DATASET_SPLIT = "train_sft"
 
75
  weights:
76
  num_bits: 8
77
  type: float
78
+ strategy: channel
79
  dynamic: false
80
  symmetric: true
81
  input_activations:
82
  num_bits: 8
83
  type: float
84
+ strategy: token
85
+ dynamic: true
86
  symmetric: true
87
  targets: ["Linear"]
88
+ kv_cache_scheme:
89
+ num_bits: 8
90
+ type: float
91
+ strategy: tensor
92
+ dynamic: false
93
+ symmetric: true
94
  """
 
95
  model_stub = "NousResearch/Hermes-3-Llama-3.1-8B"
96
  model_name = model_stub.split("/")[-1]
97
 
 
104
  )
105
  tokenizer = AutoTokenizer.from_pretrained(model_stub)
106
 
107
+ output_dir = f"./{model_name}-Dynamic-FP8-KV"
108
 
109
  DATASET_ID = "HuggingFaceH4/ultrachat_200k"
110
  DATASET_SPLIT = "train_sft"