rodrigomasini commited on
Commit
b23a956
1 Parent(s): 08e3783

Update app_v3.py

Browse files
Files changed (1) hide show
  1. app_v3.py +5 -4
app_v3.py CHANGED
@@ -38,15 +38,15 @@ if torch.cuda.is_available():
38
 
39
  #tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
40
  tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
 
41
  model = AutoGPTQForCausalLM.from_quantized(
42
  pretrained_model_dir,
43
  model_basename=model_basename,
44
  use_safetensors=True,
45
- device="cuda:0",
46
- #use_triton=use_triton,
47
- #quantize_config=None
48
  )
49
 
 
50
  viz = torch.cuda.memory_summary()
51
  st.write(viz)
52
 
@@ -55,7 +55,8 @@ user_input = st.text_input("Input a phrase")
55
  prompt_template = f'USER: {user_input}\nASSISTANT:'
56
 
57
  if st.button("Generate the prompt"):
58
- inputs = tokenizer(prompt_template, return_tensors='pt')
 
59
  #streamer = TextStreamer(tokenizer)
60
  #pipe = pipeline(
61
  # "text-generation",
 
38
 
39
  #tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
40
  tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
41
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
42
  model = AutoGPTQForCausalLM.from_quantized(
43
  pretrained_model_dir,
44
  model_basename=model_basename,
45
  use_safetensors=True,
46
+ device=device,
 
 
47
  )
48
 
49
+
50
  viz = torch.cuda.memory_summary()
51
  st.write(viz)
52
 
 
55
  prompt_template = f'USER: {user_input}\nASSISTANT:'
56
 
57
  if st.button("Generate the prompt"):
58
+ inputs = tokenizer(prompt_template, return_tensors='pt', max_length=512, truncation=True, padding='max_length', batch_size=4)
59
+ #inputs = tokenizer(prompt_template, return_tensors='pt')
60
  #streamer = TextStreamer(tokenizer)
61
  #pipe = pipeline(
62
  # "text-generation",