rodrigomasini commited on
Commit
1d7c581
1 Parent(s): 229fec7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -1
app.py CHANGED
@@ -1,5 +1,31 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- demo = gr.load("FPHam/Jackson_The_Formalizer_V2_13b_GPTQ", src="models")
4
 
5
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, pipeline, logging
3
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
+ import argparse
5
+
6
+ quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
7
+
8
+ model_basename = "Jackson2-4bit-128g-GPTQ.safetensors"
9
+
10
+ use_strict = False
11
+
12
+ use_triton = False
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
15
+
16
+ quantize_config = BaseQuantizeConfig(
17
+ bits=4,
18
+ group_size=128,
19
+ desc_act=False
20
+ )
21
+
22
+ model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir,
23
+ use_safetensors=True,
24
+ strict=use_strict,
25
+ model_basename=model_basename,
26
+ device="cuda:0",
27
+ use_triton=use_triton,
28
+ quantize_config=quantize_config)
29
 
 
30
 
31
  demo.launch()