mgoin commited on
Commit
3e3f4f0
1 Parent(s): 4241b73

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -8
README.md CHANGED
@@ -24,14 +24,7 @@ recipe = QuantizationModifier(
24
  )
25
 
26
  # Apply quantization and save to disk in compressed-tensors format.
27
- SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic"
28
  oneshot(model=model, recipe=recipe, output_dir=SAVE_DIR)
29
  processor.save_pretrained(SAVE_DIR)
30
-
31
- # Confirm generations of the quantized model look sane.
32
- print("========== SAMPLE GENERATION ==============")
33
- input_ids = processor(text="Hello my name is", return_tensors="pt").input_ids.to("cuda")
34
- output = model.generate(input_ids, max_new_tokens=20)
35
- print(processor.decode(output[0]))
36
- print("==========================================")
37
  ```
 
24
  )
25
 
26
  # Apply quantization and save to disk in compressed-tensors format.
27
+ SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-dynamic"
28
  oneshot(model=model, recipe=recipe, output_dir=SAVE_DIR)
29
  processor.save_pretrained(SAVE_DIR)
 
 
 
 
 
 
 
30
  ```