Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,9 @@ import torch
|
|
3 |
import transformers
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
6 |
import safetensors
|
7 |
|
8 |
# from transformer_engine.pytorch import fp8_autocast
|
@@ -59,10 +62,15 @@ files_to_download = [
|
|
59 |
model_dir = f"./{model_id}"
|
60 |
os.makedirs(model_dir, exist_ok=True)
|
61 |
|
|
|
|
|
62 |
|
|
|
63 |
# Download each file
|
64 |
for file in files_to_download:
|
65 |
hf_hub_download(repo_id=model_id, filename=file, local_dir=model_dir, token=auth_token)
|
|
|
|
|
66 |
|
67 |
'''
|
68 |
with fp8_autocast(): # Enables FP8 computations
|
@@ -79,7 +87,8 @@ with fp8_autocast(): # Enables FP8 computations
|
|
79 |
model = transformers.AutoModelForCausalLM.from_pretrained(model_dir, quantization_config=quantization_config)
|
80 |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_dir)
|
81 |
|
82 |
-
'''
|
|
|
83 |
model = model.half() # Convert to FP8-like (closest possible)
|
84 |
'''
|
85 |
|
|
|
3 |
import transformers
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
+
|
7 |
+
from huggingface_hub import snapshot_download
|
8 |
+
|
9 |
import safetensors
|
10 |
|
11 |
# from transformer_engine.pytorch import fp8_autocast
|
|
|
62 |
model_dir = f"./{model_id}"
|
63 |
os.makedirs(model_dir, exist_ok=True)
|
64 |
|
65 |
+
snapshot_download(repo_id=model_id, ignore_patterns=".bin", token=auth_token)
|
66 |
+
|
67 |
|
68 |
+
'''
|
69 |
# Download each file
|
70 |
for file in files_to_download:
|
71 |
hf_hub_download(repo_id=model_id, filename=file, local_dir=model_dir, token=auth_token)
|
72 |
+
'''
|
73 |
+
|
74 |
|
75 |
'''
|
76 |
with fp8_autocast(): # Enables FP8 computations
|
|
|
87 |
model = transformers.AutoModelForCausalLM.from_pretrained(model_dir, quantization_config=quantization_config)
|
88 |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_dir)
|
89 |
|
90 |
+
'''
|
91 |
+
model.to(dtype=torch.float16) # Load as FP16 first
|
92 |
model = model.half() # Convert to FP8-like (closest possible)
|
93 |
'''
|
94 |
|