choukrani commited on
Commit
20295a8
·
verified ·
1 Parent(s): f52f1a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -3,6 +3,9 @@ import torch
3
  import transformers
4
  import gradio as gr
5
  from huggingface_hub import hf_hub_download
 
 
 
6
  import safetensors
7
 
8
  # from transformer_engine.pytorch import fp8_autocast
@@ -59,10 +62,15 @@ files_to_download = [
59
  model_dir = f"./{model_id}"
60
  os.makedirs(model_dir, exist_ok=True)
61
 
 
 
62
 
 
63
  # Download each file
64
  for file in files_to_download:
65
  hf_hub_download(repo_id=model_id, filename=file, local_dir=model_dir, token=auth_token)
 
 
66
 
67
  '''
68
  with fp8_autocast(): # Enables FP8 computations
@@ -79,7 +87,8 @@ with fp8_autocast(): # Enables FP8 computations
79
  model = transformers.AutoModelForCausalLM.from_pretrained(model_dir, quantization_config=quantization_config)
80
  tokenizer = transformers.AutoTokenizer.from_pretrained(model_dir)
81
 
82
- '''model.to(dtype=torch.float16) # Load as FP16 first
 
83
  model = model.half() # Convert to FP8-like (closest possible)
84
  '''
85
 
 
3
  import transformers
4
  import gradio as gr
5
  from huggingface_hub import hf_hub_download
6
+
7
+ from huggingface_hub import snapshot_download
8
+
9
  import safetensors
10
 
11
  # from transformer_engine.pytorch import fp8_autocast
 
62
  model_dir = f"./{model_id}"
63
  os.makedirs(model_dir, exist_ok=True)
64
 
65
+ snapshot_download(repo_id=model_id, ignore_patterns=".bin", token=auth_token)
66
+
67
 
68
+ '''
69
  # Download each file
70
  for file in files_to_download:
71
  hf_hub_download(repo_id=model_id, filename=file, local_dir=model_dir, token=auth_token)
72
+ '''
73
+
74
 
75
  '''
76
  with fp8_autocast(): # Enables FP8 computations
 
87
  model = transformers.AutoModelForCausalLM.from_pretrained(model_dir, quantization_config=quantization_config)
88
  tokenizer = transformers.AutoTokenizer.from_pretrained(model_dir)
89
 
90
+ '''
91
+ model.to(dtype=torch.float16) # Load as FP16 first
92
  model = model.half() # Convert to FP8-like (closest possible)
93
  '''
94