Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,104 +1,298 @@
|
|
1 |
# Install necessary libraries
|
2 |
-
#!pip install transformers accelerate datasets gradio sympy
|
3 |
|
4 |
-
# Import libraries
|
5 |
import torch
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
7 |
-
|
8 |
-
import
|
|
|
|
|
9 |
|
10 |
-
#
|
11 |
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
|
12 |
-
|
|
|
|
|
|
|
13 |
|
14 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
|
|
22 |
llama_model, llama_tokenizer = load_model(MODEL_NAME)
|
|
|
23 |
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
outputs = []
|
30 |
for _ in range(num_samples):
|
31 |
-
input_ids =
|
32 |
-
output =
|
33 |
-
outputs.append(
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
39 |
for _ in range(num_samples):
|
40 |
-
input_ids =
|
41 |
-
output =
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
def
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Install necessary libraries
|
2 |
+
#!pip install -q transformers accelerate gguf datasets gradio sympy matplotlib pandas
|
3 |
|
|
|
4 |
import torch
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
6 |
+
from llama_cpp import Llama
|
7 |
+
from huggingface_hub import hf_hub_download
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import pandas as pd
|
10 |
|
11 |
+
# Define model paths
|
12 |
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
|
13 |
+
QUANTIZED_PRM_PATH = hf_hub_download(
|
14 |
+
repo_id="mradermacher/Llama3.1-8B-PRM-Mistral-Data-GGUF",
|
15 |
+
filename="Llama3.1-8B-PRM-Mistral-Data.Q4_K_S.gguf"
|
16 |
+
)
|
17 |
|
18 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
19 |
|
20 |
+
def load_model(model_name, quantized=False, quantized_model_path=None):
|
21 |
+
if quantized:
|
22 |
+
n_gpu_layers = -1 if torch.cuda.is_available() else 0
|
23 |
+
model = Llama(
|
24 |
+
model_path=quantized_model_path,
|
25 |
+
n_ctx=2048,
|
26 |
+
n_batch=512,
|
27 |
+
n_gpu_layers=n_gpu_layers,
|
28 |
+
verbose=False
|
29 |
+
)
|
30 |
+
return model, None
|
31 |
+
else:
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
|
33 |
+
if tokenizer.pad_token is None:
|
34 |
+
tokenizer.pad_token = tokenizer.eos_token
|
35 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
|
36 |
+
return model, tokenizer
|
37 |
|
38 |
+
# Load models
|
39 |
llama_model, llama_tokenizer = load_model(MODEL_NAME)
|
40 |
+
prm_model, _ = load_model(None, quantized=True, quantized_model_path=QUANTIZED_PRM_PATH)
|
41 |
|
42 |
+
def majority_voting(model, tokenizer, prompt, num_samples=5):
|
43 |
+
outputs = []
|
44 |
+
if isinstance(model, Llama):
|
45 |
+
for _ in range(num_samples):
|
46 |
+
output = model(prompt, max_tokens=50, temperature=0.7)
|
47 |
+
outputs.append(output["choices"][0]["text"])
|
48 |
+
else:
|
49 |
+
# Prepare inputs
|
50 |
+
input_ids = tokenizer(prompt, return_tensors="pt", padding=True).input_ids.to(device)
|
51 |
+
|
52 |
+
for _ in range(num_samples):
|
53 |
+
output = model.generate(
|
54 |
+
input_ids,
|
55 |
+
max_new_tokens=50,
|
56 |
+
pad_token_id=tokenizer.pad_token_id,
|
57 |
+
)
|
58 |
+
outputs.append(tokenizer.decode(output[0], skip_special_tokens=True))
|
59 |
+
|
60 |
+
return {
|
61 |
+
"outputs": outputs,
|
62 |
+
"final_result": max(set(outputs), key=outputs.count)
|
63 |
+
}
|
64 |
|
65 |
+
def best_of_n(model, tokenizer, prm_model, prompt, num_samples=5):
|
66 |
+
outputs = []
|
67 |
+
if isinstance(model, Llama):
|
68 |
+
for _ in range(num_samples):
|
69 |
+
output = model(prompt, max_tokens=50, temperature=0.7)
|
70 |
+
response = output["choices"][0]["text"]
|
71 |
+
score = len(response.split())
|
72 |
+
outputs.append((response, score))
|
73 |
+
else:
|
74 |
+
input_ids = tokenizer(prompt, return_tensors="pt", padding=True).input_ids.to(device)
|
75 |
+
|
76 |
+
for _ in range(num_samples):
|
77 |
+
output = model.generate(
|
78 |
+
input_ids,
|
79 |
+
max_new_tokens=50,
|
80 |
+
pad_token_id=tokenizer.pad_token_id,
|
81 |
+
)
|
82 |
+
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
83 |
+
score = len(response.split())
|
84 |
+
outputs.append((response, score))
|
85 |
+
|
86 |
+
outputs.sort(key=lambda x: x[1], reverse=True)
|
87 |
+
return {
|
88 |
+
"outputs": outputs,
|
89 |
+
"final_result": outputs[0][0]
|
90 |
+
}
|
91 |
+
|
92 |
+
def beam_search(model, tokenizer, prompt, num_beams=5):
|
93 |
+
if isinstance(model, Llama):
|
94 |
+
outputs = []
|
95 |
+
for _ in range(num_beams):
|
96 |
+
output = model(prompt, max_tokens=50, temperature=0.7)
|
97 |
+
outputs.append(output["choices"][0]["text"])
|
98 |
+
else:
|
99 |
+
input_ids = tokenizer(prompt, return_tensors="pt", padding=True).input_ids.to(device)
|
100 |
+
|
101 |
+
outputs = model.generate(
|
102 |
+
input_ids,
|
103 |
+
max_new_tokens=50,
|
104 |
+
num_beams=num_beams,
|
105 |
+
num_return_sequences=num_beams,
|
106 |
+
pad_token_id=tokenizer.pad_token_id,
|
107 |
+
)
|
108 |
+
outputs = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
|
109 |
+
|
110 |
+
return {
|
111 |
+
"outputs": outputs,
|
112 |
+
"final_result": outputs[0]
|
113 |
+
}
|
114 |
+
|
115 |
+
|
116 |
+
def temperature_sampling(model, tokenizer, prompt, temperature=0.7, num_samples=5):
|
117 |
outputs = []
|
118 |
for _ in range(num_samples):
|
119 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
120 |
+
output = model.generate(input_ids, max_new_tokens=50, temperature=temperature)
|
121 |
+
outputs.append(tokenizer.decode(output[0], skip_special_tokens=True))
|
122 |
+
return {
|
123 |
+
"outputs": outputs,
|
124 |
+
"final_result": outputs[0]
|
125 |
+
}
|
126 |
+
|
127 |
+
def top_p_sampling(model, tokenizer, prompt, top_p=0.9, num_samples=5):
|
128 |
+
outputs = []
|
129 |
for _ in range(num_samples):
|
130 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
131 |
+
output = model.generate(input_ids, max_new_tokens=50, top_p=top_p)
|
132 |
+
outputs.append(tokenizer.decode(output[0], skip_special_tokens=True))
|
133 |
+
return {
|
134 |
+
"outputs": outputs,
|
135 |
+
"final_result": outputs[0]
|
136 |
+
}
|
137 |
+
|
138 |
+
def custom_strategy(prompt, flow):
|
139 |
+
intermediate_results = []
|
140 |
+
for step in flow:
|
141 |
+
strategy = step.get("strategy")
|
142 |
+
params = step.get("params", {})
|
143 |
+
if strategy == "majority_voting":
|
144 |
+
result = majority_voting(prompt, **params)
|
145 |
+
elif strategy == "best_of_n":
|
146 |
+
result = best_of_n(prompt, **params)
|
147 |
+
elif strategy == "beam_search":
|
148 |
+
result = beam_search(prompt, **params)
|
149 |
+
elif strategy == "top_p_sampling":
|
150 |
+
result = top_p_sampling(prompt, **params)
|
151 |
+
else:
|
152 |
+
continue
|
153 |
+
intermediate_results.append({"strategy": strategy, "result": result})
|
154 |
+
prompt = result["final_result"]
|
155 |
+
return intermediate_results
|
156 |
+
|
157 |
+
def compare_strategies(model, tokenizer, prm_model, prompt, num_samples=5):
|
158 |
+
print("Running comparison...")
|
159 |
+
strategies = {
|
160 |
+
"Majority Voting": majority_voting(model, tokenizer, prompt, num_samples=5),
|
161 |
+
"Best-of-N": best_of_n(model, tokenizer, prm_model, prompt, num_samples=5),
|
162 |
+
"Beam Search": beam_search(model, tokenizer, prompt, num_beams=5)
|
163 |
+
#...
|
164 |
+
}
|
165 |
+
|
166 |
+
plt.figure(figsize=(10, 6))
|
167 |
+
plt.bar(strategies.keys(), [len(s["outputs"]) for s in strategies.values()])
|
168 |
+
plt.title("Strategy Comparison")
|
169 |
+
plt.ylabel("Number of Outputs")
|
170 |
+
plt.xticks(rotation=45)
|
171 |
+
plt.tight_layout()
|
172 |
+
plt.show()
|
173 |
+
|
174 |
+
df = pd.DataFrame.from_dict({
|
175 |
+
strategy: {
|
176 |
+
"Final Result": data["final_result"],
|
177 |
+
"Outputs": data["outputs"]
|
178 |
+
} for strategy, data in strategies.items()
|
179 |
+
}, orient="index")
|
180 |
+
|
181 |
+
return strategies, df
|
182 |
+
|
183 |
+
def test_generation():
|
184 |
+
sample_prompt = "Explain the concept of neural networks in simple terms."
|
185 |
+
print("Starting generation test...")
|
186 |
+
strategies_results, results_df = compare_strategies(llama_model, llama_tokenizer, prm_model, sample_prompt, 1)
|
187 |
+
print("\nResults DataFrame:")
|
188 |
+
print(results_df)
|
189 |
+
return strategies_results, results_df
|
190 |
+
|
191 |
+
|
192 |
+
#####
|
193 |
+
import gradio as gr
|
194 |
+
import pandas as pd
|
195 |
+
import json
|
196 |
+
|
197 |
+
def format_outputs(outputs):
|
198 |
+
if isinstance(outputs, list):
|
199 |
+
return "\n\n".join([f"Output {i+1}: {out}" for i, out in enumerate(outputs)])
|
200 |
+
return outputs
|
201 |
+
|
202 |
+
def run_single_strategy(prompt, strategy, num_samples):
|
203 |
+
if not prompt:
|
204 |
+
return "Please enter a prompt."
|
205 |
+
|
206 |
+
strategies = {
|
207 |
+
"Majority Voting": lambda: majority_voting(llama_model, llama_tokenizer, prompt, num_samples),
|
208 |
+
"Best-of-N": lambda: best_of_n(llama_model, llama_tokenizer, prm_model, prompt, num_samples),
|
209 |
+
"Beam Search": lambda: beam_search(llama_model, llama_tokenizer, prompt, num_beams=num_samples)
|
210 |
+
}
|
211 |
+
|
212 |
+
if strategy not in strategies:
|
213 |
+
return "Invalid strategy selected."
|
214 |
+
|
215 |
+
result = strategies[strategy]()
|
216 |
+
|
217 |
+
formatted_output = f"""
|
218 |
+
### Final Result:
|
219 |
+
{result['final_result']}
|
220 |
+
|
221 |
+
### All Outputs:
|
222 |
+
{format_outputs(result['outputs'])}
|
223 |
+
"""
|
224 |
+
return formatted_output
|
225 |
+
|
226 |
+
def run_all_strategies(prompt, num_samples):
|
227 |
+
if not prompt:
|
228 |
+
return "Please enter a prompt."
|
229 |
+
|
230 |
+
strategies_results, results_df = compare_strategies(
|
231 |
+
llama_model, llama_tokenizer, prm_model, prompt, num_samples
|
232 |
+
)
|
233 |
+
|
234 |
+
# Format the output for display
|
235 |
+
output_text = "# Results from All Strategies\n\n"
|
236 |
+
for strategy, results in strategies_results.items():
|
237 |
+
output_text += f"""
|
238 |
+
## {strategy}
|
239 |
+
### Final Result:
|
240 |
+
{results['final_result']}
|
241 |
+
|
242 |
+
### All Outputs:
|
243 |
+
{format_outputs(results['outputs'])}
|
244 |
+
|
245 |
+
---
|
246 |
+
"""
|
247 |
+
|
248 |
+
return output_text
|
249 |
+
|
250 |
+
# Create the Gradio interface
|
251 |
+
with gr.Blocks(title="Text Generation Strategies") as demo:
|
252 |
+
gr.Markdown("# Text Generation Strategies Demo")
|
253 |
+
|
254 |
+
with gr.Row():
|
255 |
+
with gr.Column():
|
256 |
+
prompt_input = gr.Textbox(
|
257 |
+
label="Enter your prompt",
|
258 |
+
placeholder="Type your prompt here...",
|
259 |
+
lines=3
|
260 |
+
)
|
261 |
+
num_samples = gr.Slider(
|
262 |
+
minimum=1,
|
263 |
+
maximum=10,
|
264 |
+
value=5,
|
265 |
+
step=1,
|
266 |
+
label="Number of samples/beams"
|
267 |
+
)
|
268 |
+
|
269 |
+
with gr.Row():
|
270 |
+
strategy_dropdown = gr.Dropdown(
|
271 |
+
choices=["Majority Voting", "Best-of-N", "Beam Search"],
|
272 |
+
label="Select Strategy",
|
273 |
+
value="Majority Voting"
|
274 |
+
)
|
275 |
+
|
276 |
+
with gr.Row():
|
277 |
+
single_strategy_btn = gr.Button("Run Selected Strategy")
|
278 |
+
all_strategies_btn = gr.Button("Run All Strategies")
|
279 |
+
|
280 |
+
with gr.Column():
|
281 |
+
output_display = gr.Markdown(label="Results")
|
282 |
+
|
283 |
+
# Set up event handlers
|
284 |
+
single_strategy_btn.click(
|
285 |
+
fn=run_single_strategy,
|
286 |
+
inputs=[prompt_input, strategy_dropdown, num_samples],
|
287 |
+
outputs=output_display
|
288 |
+
)
|
289 |
+
|
290 |
+
all_strategies_btn.click(
|
291 |
+
fn=run_all_strategies,
|
292 |
+
inputs=[prompt_input, num_samples],
|
293 |
+
outputs=output_display
|
294 |
+
)
|
295 |
+
|
296 |
+
# Launch the interface
|
297 |
+
if __name__ == "__main__":
|
298 |
+
demo.launch(debug=True)
|