rodrigomasini commited on
Commit
4333c79
1 Parent(s): ba18e04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -37
app.py CHANGED
@@ -98,40 +98,4 @@ if model_loaded:
98
 
99
  # Display GPU memory information after generation
100
  gpu_memory_after_generation = get_gpu_memory()
101
- st.write(f"GPU Memory Info after generation: {gpu_memory_after_generation}")
102
-
103
- tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=False)
104
-
105
- quantize_config = BaseQuantizeConfig(
106
- bits=4,
107
- group_size=128,
108
- desc_act=False
109
- )
110
-
111
- model = AutoGPTQForCausalLM.from_quantized(local_folder,
112
- use_safetensors=True,
113
- strict=use_strict,
114
- model_basename=model_basename,
115
- device="cuda:0",
116
- use_triton=use_triton,
117
- quantize_config=quantize_config)
118
-
119
- pipe = pipeline(
120
- "text-generation",
121
- model=model,
122
- tokenizer=tokenizer,
123
- max_new_tokens=512,
124
- temperature=0.1,
125
- top_p=0.95,
126
- repetition_penalty=1.15
127
- )
128
-
129
- user_input = st.text_input("Input a phrase")
130
-
131
- prompt_template=f'''USER: {user_input}
132
- ASSISTANT:'''
133
-
134
- # Generate output when the "Generate" button is pressed
135
- if st.button("Generate the prompt"):
136
- output = pipe(prompt_template)[0]['generated_text']
137
- st.text_area("Prompt", value=output)
 
98
 
99
  # Display GPU memory information after generation
100
  gpu_memory_after_generation = get_gpu_memory()
101
+ st.write(f"GPU Memory Info after generation: {gpu_memory_after_generation}")