rodrigomasini commited on
Commit
5f5b726
1 Parent(s): 2280c62

Update app_v2.py

Browse files
Files changed (1) hide show
  1. app_v2.py +5 -6
app_v2.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer
3
  from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
 
4
  import os
5
 
6
  # Define pretrained and quantized model directories
@@ -10,29 +11,27 @@ quantized_model_dir = "./Jackson2-4bit-128g-GPTQ"
10
  # Create the cache directory if it doesn't exist
11
  os.makedirs(quantized_model_dir, exist_ok=True)
12
 
 
 
13
  # Quantization configuration
14
  quantize_config = BaseQuantizeConfig(bits=4, group_size=128, damp_percent=0.01, desc_act=False)
15
 
16
  # Load the model using from_quantized
17
  model = AutoGPTQForCausalLM.from_quantized(
18
- pretrained_model_dir,
19
  use_safetensors=True,
20
  strict=False,
21
- model_basename='Jackson2-4bit-128g-GPTQ',
22
  device="cuda:0",
23
  trust_remote_code=True,
24
  use_triton=False,
25
  quantize_config=quantize_config
26
  )
27
 
28
- model.save_quantized(quantized_model_dir)
29
-
30
 
31
  # Load the tokenizer
32
  tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
33
 
34
- model_for_inference = AutoGPTQForCausalLM.from_pretrained(quantized_model_dir)
35
-
36
  # Starting Streamlit app
37
  st.title("AutoGPTQ Streamlit App")
38
 
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer
3
  from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
+ from huggingface_hub import snapshot_download
5
  import os
6
 
7
  # Define pretrained and quantized model directories
 
11
  # Create the cache directory if it doesn't exist
12
  os.makedirs(quantized_model_dir, exist_ok=True)
13
 
14
+ snapshot_download(repo_id=pretrained_model_dir, local_dir=quantized_model_dir, local_dir_use_symlinks=False)
15
+
16
  # Quantization configuration
17
  quantize_config = BaseQuantizeConfig(bits=4, group_size=128, damp_percent=0.01, desc_act=False)
18
 
19
  # Load the model using from_quantized
20
  model = AutoGPTQForCausalLM.from_quantized(
21
+ quantized_model_dir,
22
  use_safetensors=True,
23
  strict=False,
 
24
  device="cuda:0",
25
  trust_remote_code=True,
26
  use_triton=False,
27
  quantize_config=quantize_config
28
  )
29
 
30
+ #model.save_quantized(quantized_model_dir)
 
31
 
32
  # Load the tokenizer
33
  tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
34
 
 
 
35
  # Starting Streamlit app
36
  st.title("AutoGPTQ Streamlit App")
37