MekkCyber commited on
Commit
7f64e83
·
1 Parent(s): 1bb9947

changing gradio version

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +1 -129
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 💻
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 4.39.0
8
  app_file: app.py
9
  pinned: false
10
 
 
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 4.27.0
8
  app_file: app.py
9
  pinned: false
10
 
app.py CHANGED
@@ -196,132 +196,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
196
 
197
 
198
  # Launch the app
199
- app.launch()
200
-
201
-
202
-
203
-
204
-
205
-
206
-
207
-
208
- from torchao.quantization import (
209
- int4_weight_only,
210
- int8_dynamic_activation_int8_weight,
211
- int8_weight_only,
212
- )
213
-
214
- # import gradio as gr
215
- # import torch
216
- # from transformers import AutoModelForCausalLM, AutoTokenizer
217
- # import torch.ao.quantization as quant
218
- # import os
219
- # from huggingface_hub import HfApi
220
- # import tempfile
221
- # import torch.utils.data as data
222
- # from torchao.quantization import quantize_
223
-
224
- # def load_calibration_dataset(tokenizer, num_samples=100):
225
- # # This is a placeholder. In a real scenario, you'd load actual data.
226
- # dummy_texts = ["This is a sample text" for _ in range(num_samples)]
227
- # encodings = tokenizer(dummy_texts, truncation=True, padding=True, return_tensors="pt")
228
- # dataset = data.TensorDataset(encodings['input_ids'], encodings['attention_mask'])
229
- # return data.DataLoader(dataset, batch_size=1)
230
-
231
- # def load_model(model_name):
232
- # print(f"Loading model: {model_name}")
233
- # model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto")
234
- # tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
235
- # return model, tokenizer
236
-
237
- # def quantize_model(model, quant_type, dtype):
238
- # print(f"Quantizing model: {quant_type} - {dtype}")
239
- # quantize_(model, _STR_TO_METHOD[dtype](group_size=128))
240
-
241
- # def save_model(model, model_name, quant_type, dtype):
242
- # print("Saving quantized model")
243
- # model.save_pretrained("medmekk/model_llama", safe_serialization=False)
244
- # with tempfile.TemporaryDirectory() as tmpdirname:
245
- # model.save_pretrained(tmpdirname)
246
-
247
- # # Create a new repo name
248
- # repo_name = f"{model_name.split('/')[-1]}-quantized-{quant_type.lower()}-{dtype}bit"
249
-
250
- # # Push to Hub
251
- # api = HfApi()
252
- # api.create_repo(repo_name, exist_ok=True)
253
- # api.upload_folder(
254
- # folder_path=tmpdirname,
255
- # repo_id=repo_name,
256
- # repo_type="model",
257
- # )
258
-
259
- # return f"https://huggingface.co/{repo_name}"
260
-
261
- # _STR_TO_METHOD = {
262
- # "int4_weight_only": int4_weight_only,
263
- # "int8_weight_only": int8_weight_only,
264
- # "int8_dynamic_activation_int8_weight": int8_dynamic_activation_int8_weight,
265
- # }
266
-
267
- # def quantize_and_save(model_name, quant_type, dtype):
268
-
269
- # model, tokenizer = load_model(model_name)
270
- # quantize_model(model, quant_type, dtype)
271
- # print(model.device)
272
- # return save_model(model, model_name, quant_type, dtype)
273
-
274
-
275
- # # Gradio interface
276
- # with gr.Blocks(theme=gr.themes.Soft()) as app:
277
- # gr.Markdown(
278
- # """
279
- # # 🚀 Model Quantization App
280
-
281
- # Quantize your favorite Hugging Face models and save them to your profile!
282
- # """
283
- # )
284
-
285
- # with gr.Row():
286
- # with gr.Column():
287
- # model_name = gr.Textbox(
288
- # label="Model Name",
289
- # placeholder="e.g., gpt2, distilgpt2",
290
- # value="meta-llama/Meta-Llama-3-8B-Instruct"
291
- # )
292
- # quant_type = gr.Dropdown(
293
- # label="Quantization Type",
294
- # choices=["Dynamic", "Static"],
295
- # value="Dynamic"
296
- # )
297
- # dtype = gr.Dropdown(
298
- # label="Data Type",
299
- # choices=["int4_weight_only", "int8_weight_only", "int8_dynamic_activation_int8_weight"],
300
- # value="int4_weight_only"
301
- # )
302
-
303
- # with gr.Column():
304
- # quantize_button = gr.Button("Quantize and Save Model", variant="primary")
305
- # output_link = gr.Textbox(label="Output", interactive=False)
306
-
307
- # gr.Markdown(
308
- # """
309
- # ## Instructions
310
- # 1. Enter the name of the Hugging Face model you want to quantize.
311
- # 2. Choose the quantization type.
312
- # 3. If using Weight Only quantization, select the number of bits.
313
- # 4. Click "Quantize and Save Model" to start the process.
314
- # 5. Once complete, you'll receive a link to the quantized model on Hugging Face.
315
-
316
- # Note: This process may take some time depending on the model size and your hardware.
317
- # """
318
- # )
319
-
320
- # quantize_button.click(
321
- # fn=quantize_and_save,
322
- # inputs=[model_name, quant_type, dtype],
323
- # outputs=[output_link]
324
- # )
325
-
326
- # # Launch the app
327
- # app.launch(share=True)
 
196
 
197
 
198
  # Launch the app
199
+ app.launch()