Chris4K commited on
Commit
af8f66e
Β·
verified Β·
1 Parent(s): 286a33a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +282 -88
app.py CHANGED
@@ -1,104 +1,298 @@
1
  # Install necessary libraries
2
- #!pip install transformers accelerate datasets gradio sympy
3
 
4
- # Import libraries
5
  import torch
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
- import gradio as gr
8
- import sympy
 
 
9
 
10
- # Load Model and Tokenizer
11
  MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
12
- PRM_NAME = "RLHFlow/Llama3.1-8B-PRM"
 
 
 
13
 
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
- # Load LLaMA model
17
- def load_model(model_name):
18
- tokenizer = AutoTokenizer.from_pretrained(model_name)
19
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
20
- return model.to(device), tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
22
  llama_model, llama_tokenizer = load_model(MODEL_NAME)
 
23
 
24
- # Load Process Reward Model (PRM)
25
- prm_model, prm_tokenizer = load_model(PRM_NAME)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Strategies
28
- def majority_voting(prompt, num_samples=5):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  outputs = []
30
  for _ in range(num_samples):
31
- input_ids = llama_tokenizer(prompt, return_tensors="pt").input_ids.to(device)
32
- output = llama_model.generate(input_ids, max_new_tokens=50)
33
- outputs.append(llama_tokenizer.decode(output[0], skip_special_tokens=True))
34
- # Return the most common result
35
- return max(set(outputs), key=outputs.count)
36
-
37
- def best_of_n(prompt, num_samples=5):
38
- scored_outputs = []
 
 
39
  for _ in range(num_samples):
40
- input_ids = llama_tokenizer(prompt, return_tensors="pt").input_ids.to(device)
41
- output = llama_model.generate(input_ids, max_new_tokens=50)
42
- response = llama_tokenizer.decode(output[0], skip_special_tokens=True)
43
- score = prm_model(**prm_tokenizer(response, return_tensors="pt").to(device)).logits.mean().item()
44
- scored_outputs.append((response, score))
45
- # Return the highest scored response
46
- return max(scored_outputs, key=lambda x: x[1])[0]
47
-
48
- def beam_search(prompt, num_beams=5):
49
- input_ids = llama_tokenizer(prompt, return_tensors="pt").input_ids.to(device)
50
- outputs = llama_model.generate(input_ids, max_new_tokens=50, num_beams=num_beams, num_return_sequences=num_beams)
51
- return [llama_tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
52
-
53
- def dvts(prompt, depth=3, breadth=2):
54
- """
55
- Simplified implementation of DVTS: generates a tree of solutions and evaluates branches using PRM.
56
- """
57
- results = []
58
- for _ in range(breadth):
59
- input_ids = llama_tokenizer(prompt, return_tensors="pt").input_ids.to(device)
60
- output = llama_model.generate(input_ids, max_new_tokens=50)
61
- response = llama_tokenizer.decode(output[0], skip_special_tokens=True)
62
- score = prm_model(**prm_tokenizer(response, return_tensors="pt").to(device)).logits.mean().item()
63
- results.append((response, score))
64
- # Select the top responses and expand them recursively
65
- for _ in range(depth - 1):
66
- best_responses = sorted(results, key=lambda x: x[1], reverse=True)[:breadth]
67
- for response, _ in best_responses:
68
- input_ids = llama_tokenizer(response, return_tensors="pt").input_ids.to(device)
69
- output = llama_model.generate(input_ids, max_new_tokens=50)
70
- extended_response = llama_tokenizer.decode(output[0], skip_special_tokens=True)
71
- score = prm_model(**prm_tokenizer(extended_response, return_tensors="pt").to(device)).logits.mean().item()
72
- results.append((extended_response, score))
73
- # Return the best overall response
74
- return max(results, key=lambda x: x[1])[0]
75
-
76
- # Gradio Interface
77
- def inference(prompt, strategy, num_samples, depth, breadth):
78
- if strategy == "Majority Voting":
79
- return majority_voting(prompt, num_samples)
80
- elif strategy == "Best-of-N":
81
- return best_of_n(prompt, num_samples)
82
- elif strategy == "Beam Search":
83
- return beam_search(prompt, num_samples)
84
- elif strategy == "DVTS":
85
- return dvts(prompt, depth, breadth)
86
- else:
87
- return "Invalid Strategy"
88
-
89
- gr.Interface(
90
- fn=inference,
91
- inputs=[
92
- gr.Textbox(label="Problem Statement", placeholder="Enter your problem here"),
93
- gr.Radio(
94
- ["Majority Voting", "Best-of-N", "Beam Search", "DVTS"],
95
- label="Inference Strategy",
96
- ),
97
- gr.Slider(1, 10, step=1, value=5, label="Number of Samples"),
98
- gr.Slider(1, 5, step=1, value=3, label="Depth (DVTS Only)"),
99
- gr.Slider(1, 5, step=1, value=2, label="Breadth (DVTS Only)"),
100
- ],
101
- outputs="text",
102
- title="Dynamic Inference Toolkit",
103
- description="Explore test-time compute scaling strategies with Meta's LLaMA model.",
104
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Install necessary libraries
2
+ #!pip install -q transformers accelerate gguf datasets gradio sympy matplotlib pandas
3
 
 
4
  import torch
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
+ from llama_cpp import Llama
7
+ from huggingface_hub import hf_hub_download
8
+ import matplotlib.pyplot as plt
9
+ import pandas as pd
10
 
11
+ # Define model paths
12
  MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
13
+ QUANTIZED_PRM_PATH = hf_hub_download(
14
+ repo_id="mradermacher/Llama3.1-8B-PRM-Mistral-Data-GGUF",
15
+ filename="Llama3.1-8B-PRM-Mistral-Data.Q4_K_S.gguf"
16
+ )
17
 
18
  device = "cuda" if torch.cuda.is_available() else "cpu"
19
 
20
+ def load_model(model_name, quantized=False, quantized_model_path=None):
21
+ if quantized:
22
+ n_gpu_layers = -1 if torch.cuda.is_available() else 0
23
+ model = Llama(
24
+ model_path=quantized_model_path,
25
+ n_ctx=2048,
26
+ n_batch=512,
27
+ n_gpu_layers=n_gpu_layers,
28
+ verbose=False
29
+ )
30
+ return model, None
31
+ else:
32
+ tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
33
+ if tokenizer.pad_token is None:
34
+ tokenizer.pad_token = tokenizer.eos_token
35
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
36
+ return model, tokenizer
37
 
38
+ # Load models
39
  llama_model, llama_tokenizer = load_model(MODEL_NAME)
40
+ prm_model, _ = load_model(None, quantized=True, quantized_model_path=QUANTIZED_PRM_PATH)
41
 
42
+ def majority_voting(model, tokenizer, prompt, num_samples=5):
43
+ outputs = []
44
+ if isinstance(model, Llama):
45
+ for _ in range(num_samples):
46
+ output = model(prompt, max_tokens=50, temperature=0.7)
47
+ outputs.append(output["choices"][0]["text"])
48
+ else:
49
+ # Prepare inputs
50
+ input_ids = tokenizer(prompt, return_tensors="pt", padding=True).input_ids.to(device)
51
+
52
+ for _ in range(num_samples):
53
+ output = model.generate(
54
+ input_ids,
55
+ max_new_tokens=50,
56
+ pad_token_id=tokenizer.pad_token_id,
57
+ )
58
+ outputs.append(tokenizer.decode(output[0], skip_special_tokens=True))
59
+
60
+ return {
61
+ "outputs": outputs,
62
+ "final_result": max(set(outputs), key=outputs.count)
63
+ }
64
 
65
+ def best_of_n(model, tokenizer, prm_model, prompt, num_samples=5):
66
+ outputs = []
67
+ if isinstance(model, Llama):
68
+ for _ in range(num_samples):
69
+ output = model(prompt, max_tokens=50, temperature=0.7)
70
+ response = output["choices"][0]["text"]
71
+ score = len(response.split())
72
+ outputs.append((response, score))
73
+ else:
74
+ input_ids = tokenizer(prompt, return_tensors="pt", padding=True).input_ids.to(device)
75
+
76
+ for _ in range(num_samples):
77
+ output = model.generate(
78
+ input_ids,
79
+ max_new_tokens=50,
80
+ pad_token_id=tokenizer.pad_token_id,
81
+ )
82
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
83
+ score = len(response.split())
84
+ outputs.append((response, score))
85
+
86
+ outputs.sort(key=lambda x: x[1], reverse=True)
87
+ return {
88
+ "outputs": outputs,
89
+ "final_result": outputs[0][0]
90
+ }
91
+
92
+ def beam_search(model, tokenizer, prompt, num_beams=5):
93
+ if isinstance(model, Llama):
94
+ outputs = []
95
+ for _ in range(num_beams):
96
+ output = model(prompt, max_tokens=50, temperature=0.7)
97
+ outputs.append(output["choices"][0]["text"])
98
+ else:
99
+ input_ids = tokenizer(prompt, return_tensors="pt", padding=True).input_ids.to(device)
100
+
101
+ outputs = model.generate(
102
+ input_ids,
103
+ max_new_tokens=50,
104
+ num_beams=num_beams,
105
+ num_return_sequences=num_beams,
106
+ pad_token_id=tokenizer.pad_token_id,
107
+ )
108
+ outputs = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
109
+
110
+ return {
111
+ "outputs": outputs,
112
+ "final_result": outputs[0]
113
+ }
114
+
115
+
116
+ def temperature_sampling(model, tokenizer, prompt, temperature=0.7, num_samples=5):
117
  outputs = []
118
  for _ in range(num_samples):
119
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
120
+ output = model.generate(input_ids, max_new_tokens=50, temperature=temperature)
121
+ outputs.append(tokenizer.decode(output[0], skip_special_tokens=True))
122
+ return {
123
+ "outputs": outputs,
124
+ "final_result": outputs[0]
125
+ }
126
+
127
+ def top_p_sampling(model, tokenizer, prompt, top_p=0.9, num_samples=5):
128
+ outputs = []
129
  for _ in range(num_samples):
130
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
131
+ output = model.generate(input_ids, max_new_tokens=50, top_p=top_p)
132
+ outputs.append(tokenizer.decode(output[0], skip_special_tokens=True))
133
+ return {
134
+ "outputs": outputs,
135
+ "final_result": outputs[0]
136
+ }
137
+
138
+ def custom_strategy(prompt, flow):
139
+ intermediate_results = []
140
+ for step in flow:
141
+ strategy = step.get("strategy")
142
+ params = step.get("params", {})
143
+ if strategy == "majority_voting":
144
+ result = majority_voting(prompt, **params)
145
+ elif strategy == "best_of_n":
146
+ result = best_of_n(prompt, **params)
147
+ elif strategy == "beam_search":
148
+ result = beam_search(prompt, **params)
149
+ elif strategy == "top_p_sampling":
150
+ result = top_p_sampling(prompt, **params)
151
+ else:
152
+ continue
153
+ intermediate_results.append({"strategy": strategy, "result": result})
154
+ prompt = result["final_result"]
155
+ return intermediate_results
156
+
157
+ def compare_strategies(model, tokenizer, prm_model, prompt, num_samples=5):
158
+ print("Running comparison...")
159
+ strategies = {
160
+ "Majority Voting": majority_voting(model, tokenizer, prompt, num_samples=5),
161
+ "Best-of-N": best_of_n(model, tokenizer, prm_model, prompt, num_samples=5),
162
+ "Beam Search": beam_search(model, tokenizer, prompt, num_beams=5)
163
+ #...
164
+ }
165
+
166
+ plt.figure(figsize=(10, 6))
167
+ plt.bar(strategies.keys(), [len(s["outputs"]) for s in strategies.values()])
168
+ plt.title("Strategy Comparison")
169
+ plt.ylabel("Number of Outputs")
170
+ plt.xticks(rotation=45)
171
+ plt.tight_layout()
172
+ plt.show()
173
+
174
+ df = pd.DataFrame.from_dict({
175
+ strategy: {
176
+ "Final Result": data["final_result"],
177
+ "Outputs": data["outputs"]
178
+ } for strategy, data in strategies.items()
179
+ }, orient="index")
180
+
181
+ return strategies, df
182
+
183
+ def test_generation():
184
+ sample_prompt = "Explain the concept of neural networks in simple terms."
185
+ print("Starting generation test...")
186
+ strategies_results, results_df = compare_strategies(llama_model, llama_tokenizer, prm_model, sample_prompt, 1)
187
+ print("\nResults DataFrame:")
188
+ print(results_df)
189
+ return strategies_results, results_df
190
+
191
+
192
+ #####
193
+ import gradio as gr
194
+ import pandas as pd
195
+ import json
196
+
197
+ def format_outputs(outputs):
198
+ if isinstance(outputs, list):
199
+ return "\n\n".join([f"Output {i+1}: {out}" for i, out in enumerate(outputs)])
200
+ return outputs
201
+
202
+ def run_single_strategy(prompt, strategy, num_samples):
203
+ if not prompt:
204
+ return "Please enter a prompt."
205
+
206
+ strategies = {
207
+ "Majority Voting": lambda: majority_voting(llama_model, llama_tokenizer, prompt, num_samples),
208
+ "Best-of-N": lambda: best_of_n(llama_model, llama_tokenizer, prm_model, prompt, num_samples),
209
+ "Beam Search": lambda: beam_search(llama_model, llama_tokenizer, prompt, num_beams=num_samples)
210
+ }
211
+
212
+ if strategy not in strategies:
213
+ return "Invalid strategy selected."
214
+
215
+ result = strategies[strategy]()
216
+
217
+ formatted_output = f"""
218
+ ### Final Result:
219
+ {result['final_result']}
220
+
221
+ ### All Outputs:
222
+ {format_outputs(result['outputs'])}
223
+ """
224
+ return formatted_output
225
+
226
+ def run_all_strategies(prompt, num_samples):
227
+ if not prompt:
228
+ return "Please enter a prompt."
229
+
230
+ strategies_results, results_df = compare_strategies(
231
+ llama_model, llama_tokenizer, prm_model, prompt, num_samples
232
+ )
233
+
234
+ # Format the output for display
235
+ output_text = "# Results from All Strategies\n\n"
236
+ for strategy, results in strategies_results.items():
237
+ output_text += f"""
238
+ ## {strategy}
239
+ ### Final Result:
240
+ {results['final_result']}
241
+
242
+ ### All Outputs:
243
+ {format_outputs(results['outputs'])}
244
+
245
+ ---
246
+ """
247
+
248
+ return output_text
249
+
250
+ # Create the Gradio interface
251
+ with gr.Blocks(title="Text Generation Strategies") as demo:
252
+ gr.Markdown("# Text Generation Strategies Demo")
253
+
254
+ with gr.Row():
255
+ with gr.Column():
256
+ prompt_input = gr.Textbox(
257
+ label="Enter your prompt",
258
+ placeholder="Type your prompt here...",
259
+ lines=3
260
+ )
261
+ num_samples = gr.Slider(
262
+ minimum=1,
263
+ maximum=10,
264
+ value=5,
265
+ step=1,
266
+ label="Number of samples/beams"
267
+ )
268
+
269
+ with gr.Row():
270
+ strategy_dropdown = gr.Dropdown(
271
+ choices=["Majority Voting", "Best-of-N", "Beam Search"],
272
+ label="Select Strategy",
273
+ value="Majority Voting"
274
+ )
275
+
276
+ with gr.Row():
277
+ single_strategy_btn = gr.Button("Run Selected Strategy")
278
+ all_strategies_btn = gr.Button("Run All Strategies")
279
+
280
+ with gr.Column():
281
+ output_display = gr.Markdown(label="Results")
282
+
283
+ # Set up event handlers
284
+ single_strategy_btn.click(
285
+ fn=run_single_strategy,
286
+ inputs=[prompt_input, strategy_dropdown, num_samples],
287
+ outputs=output_display
288
+ )
289
+
290
+ all_strategies_btn.click(
291
+ fn=run_all_strategies,
292
+ inputs=[prompt_input, num_samples],
293
+ outputs=output_display
294
+ )
295
+
296
+ # Launch the interface
297
+ if __name__ == "__main__":
298
+ demo.launch(debug=True)