lupantech commited on
Commit
3e4fa1f
·
1 Parent(s): 9ab860d

polished ui

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app_v1_0215.py +0 -307
  3. app_v2_0216.py +0 -371
.gitignore CHANGED
@@ -175,3 +175,4 @@ detected_objects/
175
 
176
  # [Gradio]
177
  demo_solver_cache/
 
 
175
 
176
  # [Gradio]
177
  demo_solver_cache/
178
+ backups/
app_v1_0215.py DELETED
@@ -1,307 +0,0 @@
1
- import os
2
- import sys
3
- import json
4
- import argparse
5
- import time
6
- import io
7
- import uuid
8
- from PIL import Image
9
- from typing import List, Dict, Any, Iterator
10
- import gradio as gr
11
-
12
- # Add the project root to the Python path
13
- current_dir = os.path.dirname(os.path.abspath(__file__))
14
- project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
15
- sys.path.insert(0, project_root)
16
-
17
- from opentools.models.initializer import Initializer
18
- from opentools.models.planner import Planner
19
- from opentools.models.memory import Memory
20
- from opentools.models.executor import Executor
21
- from opentools.models.utlis import make_json_serializable
22
-
23
- solver = None
24
-
25
- class ChatMessage:
26
- def __init__(self, role: str, content: str, metadata: dict = None):
27
- self.role = role
28
- self.content = content
29
- self.metadata = metadata or {}
30
-
31
- class Solver:
32
- def __init__(
33
- self,
34
- planner,
35
- memory,
36
- executor,
37
- task: str,
38
- task_description: str,
39
- output_types: str = "base,final,direct",
40
- index: int = 0,
41
- verbose: bool = True,
42
- max_steps: int = 10,
43
- max_time: int = 60,
44
- output_json_dir: str = "results",
45
- root_cache_dir: str = "cache"
46
- ):
47
- self.planner = planner
48
- self.memory = memory
49
- self.executor = executor
50
- self.task = task
51
- self.task_description = task_description
52
- self.index = index
53
- self.verbose = verbose
54
- self.max_steps = max_steps
55
- self.max_time = max_time
56
- self.output_json_dir = output_json_dir
57
- self.root_cache_dir = root_cache_dir
58
-
59
- self.output_types = output_types.lower().split(',')
60
- assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
61
-
62
- # self.benchmark_data = self.load_benchmark_data()
63
-
64
-
65
-
66
- def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
67
- """
68
- Streams intermediate thoughts and final responses for the problem-solving process based on user input.
69
-
70
- Args:
71
- user_query (str): The text query input from the user.
72
- user_image (Image.Image): The uploaded image from the user (PIL Image object).
73
- messages (list): A list of ChatMessage objects to store the streamed responses.
74
- """
75
-
76
- if user_image:
77
- # # Convert PIL Image to bytes (for processing)
78
- # img_bytes_io = io.BytesIO()
79
- # user_image.save(img_bytes_io, format="PNG") # Convert image to PNG bytes
80
- # img_bytes = img_bytes_io.getvalue() # Get bytes
81
-
82
- # Use image paths instead of bytes,
83
- os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
84
- img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
85
- user_image.save(img_path)
86
- else:
87
- img_path = None
88
-
89
- # Set query cache
90
- _cache_dir = os.path.join(self.root_cache_dir)
91
- self.executor.set_query_cache_dir(_cache_dir)
92
-
93
- # Step 1: Display the received inputs
94
- if user_image:
95
- messages.append(ChatMessage(role="assistant", content=f"📝 Received Query: {user_query}\n🖼️ Image Uploaded"))
96
- else:
97
- messages.append(ChatMessage(role="assistant", content=f"📝 Received Query: {user_query}"))
98
- yield messages
99
-
100
- # Step 2: Add "thinking" status while processing
101
- messages.append(ChatMessage(
102
- role="assistant",
103
- content="",
104
- metadata={"title": "⏳ Thinking: Processing input..."}
105
- ))
106
-
107
- # Step 3: Initialize problem-solving state
108
- start_time = time.time()
109
- step_count = 0
110
- json_data = {"query": user_query, "image": "Image received as bytes"}
111
-
112
- # Step 4: Query Analysis
113
- query_analysis = self.planner.analyze_query(user_query, img_path)
114
- json_data["query_analysis"] = query_analysis
115
- messages.append(ChatMessage(role="assistant", content=f"🔍 Query Analysis:\n{query_analysis}"))
116
- yield messages
117
-
118
- # Step 5: Execution loop (similar to your step-by-step solver)
119
- while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
120
- step_count += 1
121
- messages.append(ChatMessage(role="assistant", content=f"🔄 Step {step_count}: Generating next step..."))
122
- yield messages
123
-
124
- # Generate the next step
125
- next_step = self.planner.generate_next_step(
126
- user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
127
- )
128
- context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
129
-
130
- # Display the step information
131
- messages.append(ChatMessage(
132
- role="assistant",
133
- content=f"📌 Step {step_count} Details:\n- Context: {context}\n- Sub-goal: {sub_goal}\n- Tool: {tool_name}"
134
- ))
135
- yield messages
136
-
137
- # Handle tool execution or errors
138
- if tool_name not in self.planner.available_tools:
139
- messages.append(ChatMessage(role="assistant", content=f"⚠️ Error: Tool '{tool_name}' is not available."))
140
- yield messages
141
- continue
142
-
143
- # Execute the tool command
144
- tool_command = self.executor.generate_tool_command(
145
- user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
146
- )
147
- explanation, command = self.executor.extract_explanation_and_command(tool_command)
148
- result = self.executor.execute_tool_command(tool_name, command)
149
- result = make_json_serializable(result)
150
-
151
- messages.append(ChatMessage(role="assistant", content=f"✅ Step {step_count} Result:\n{json.dumps(result, indent=4)}"))
152
- yield messages
153
-
154
- # Step 6: Memory update and stopping condition
155
- self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
156
- stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
157
- conclusion = self.planner.extract_conclusion(stop_verification)
158
-
159
- messages.append(ChatMessage(role="assistant", content=f"🛑 Step {step_count} Conclusion: {conclusion}"))
160
- yield messages
161
-
162
- if conclusion == 'STOP':
163
- break
164
-
165
- # Step 7: Generate Final Output (if needed)
166
- if 'final' in self.output_types:
167
- final_output = self.planner.generate_final_output(user_query, img_path, self.memory)
168
- messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
169
- yield messages
170
-
171
- if 'direct' in self.output_types:
172
- direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
173
- messages.append(ChatMessage(role="assistant", content=f"🔹 Direct Output:\n{direct_output}"))
174
- yield messages
175
-
176
- # Step 8: Completion Message
177
- messages.append(ChatMessage(role="assistant", content="✅ Problem-solving process complete."))
178
- yield messages
179
-
180
- def parse_arguments():
181
- parser = argparse.ArgumentParser(description="Run the OpenTools demo with specified parameters.")
182
- parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
183
- parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
184
- parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
185
- parser.add_argument("--task", default="minitoolbench", help="Task to run.")
186
- parser.add_argument("--task_description", default="", help="Task description.")
187
- parser.add_argument(
188
- "--output_types",
189
- default="base,final,direct",
190
- help="Comma-separated list of required outputs (base,final,direct)"
191
- )
192
- parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
193
- parser.add_argument("--root_cache_dir", default="demo_solver_cache", help="Path to solver cache directory.")
194
- parser.add_argument("--output_json_dir", default="demo_results", help="Path to output JSON directory.")
195
- parser.add_argument("--max_steps", type=int, default=10, help="Maximum number of steps to execute.")
196
- parser.add_argument("--max_time", type=int, default=60, help="Maximum time allowed in seconds.")
197
- parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
198
- return parser.parse_args()
199
-
200
-
201
- def solve_problem_gradio(user_query, user_image):
202
- """
203
- Wrapper function to connect the solver to Gradio.
204
- Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
205
- """
206
- global solver # Ensure we're using the globally defined solver
207
-
208
- if solver is None:
209
- return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
210
-
211
- messages = [] # Initialize message list
212
- for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
213
- yield [[msg.role, msg.content] for msg in message_batch] # Ensure correct format for Gradio Chatbot
214
-
215
-
216
-
217
- def main(args):
218
- global solver
219
- # Initialize Tools
220
- enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
221
-
222
-
223
- # Instantiate Initializer
224
- initializer = Initializer(
225
- enabled_tools=enabled_tools,
226
- model_string=args.llm_engine_name
227
- )
228
-
229
- # Instantiate Planner
230
- planner = Planner(
231
- llm_engine_name=args.llm_engine_name,
232
- toolbox_metadata=initializer.toolbox_metadata,
233
- available_tools=initializer.available_tools
234
- )
235
-
236
- # Instantiate Memory
237
- memory = Memory()
238
-
239
- # Instantiate Executor
240
- executor = Executor(
241
- llm_engine_name=args.llm_engine_name,
242
- root_cache_dir=args.root_cache_dir,
243
- enable_signal=False
244
- )
245
-
246
- # Instantiate Solver
247
- solver = Solver(
248
- planner=planner,
249
- memory=memory,
250
- executor=executor,
251
- task=args.task,
252
- task_description=args.task_description,
253
- output_types=args.output_types, # Add new parameter
254
- verbose=args.verbose,
255
- max_steps=args.max_steps,
256
- max_time=args.max_time,
257
- output_json_dir=args.output_json_dir,
258
- root_cache_dir=args.root_cache_dir
259
- )
260
-
261
- # Test Inputs
262
- # user_query = "How many balls are there in the image?"
263
- # user_image_path = "/home/sheng/toolbox-agent/mathvista_113.png" # Replace with your actual image path
264
-
265
- # # Load the image as a PIL object
266
- # user_image = Image.open(user_image_path).convert("RGB") # Ensure it's in RGB mode
267
-
268
- # print("\n=== Starting Problem Solving ===\n")
269
- # messages = []
270
- # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
271
- # for message in message_batch:
272
- # print(f"{message.role}: {message.content}")
273
-
274
- # messages = []
275
- # solver.stream_solve_user_problem(user_query, user_image, messages)
276
-
277
-
278
- # def solve_problem_stream(user_query, user_image):
279
- # messages = [] # Ensure it's a list of [role, content] pairs
280
-
281
- # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
282
- # yield message_batch # Stream messages correctly in tuple format
283
-
284
- # solve_problem_stream(user_query, user_image)
285
-
286
- # ========== Gradio Interface ==========
287
- with gr.Blocks() as demo:
288
- gr.Markdown("# 🧠 OctoTools AI Solver") # Title
289
-
290
- with gr.Row():
291
- user_query = gr.Textbox(label="Enter your query", placeholder="Type your question here...")
292
- user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
293
-
294
- run_button = gr.Button("Run") # Run button
295
- chatbot_output = gr.Chatbot(label="Problem-Solving Output")
296
-
297
- # Link button click to function
298
- run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image], outputs=chatbot_output)
299
-
300
- # Launch the Gradio app
301
- demo.launch()
302
-
303
-
304
-
305
- if __name__ == "__main__":
306
- args = parse_arguments()
307
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_v2_0216.py DELETED
@@ -1,371 +0,0 @@
1
- import os
2
- import sys
3
- import json
4
- import argparse
5
- import time
6
- import io
7
- import uuid
8
- from PIL import Image
9
- from typing import List, Dict, Any, Iterator
10
-
11
- import gradio as gr
12
- from gradio import ChatMessage
13
-
14
- # Add the project root to the Python path
15
- current_dir = os.path.dirname(os.path.abspath(__file__))
16
- project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
17
- sys.path.insert(0, project_root)
18
-
19
- from octotools.models.initializer import Initializer
20
- from octotools.models.planner import Planner
21
- from octotools.models.memory import Memory
22
- from octotools.models.executor import Executor
23
- from octotools.models.utils import make_json_serializable
24
-
25
-
26
- class Solver:
27
- def __init__(
28
- self,
29
- planner,
30
- memory,
31
- executor,
32
- task: str,
33
- task_description: str,
34
- output_types: str = "base,final,direct",
35
- index: int = 0,
36
- verbose: bool = True,
37
- max_steps: int = 10,
38
- max_time: int = 60,
39
- output_json_dir: str = "results",
40
- root_cache_dir: str = "cache"
41
- ):
42
- self.planner = planner
43
- self.memory = memory
44
- self.executor = executor
45
- self.task = task
46
- self.task_description = task_description
47
- self.index = index
48
- self.verbose = verbose
49
- self.max_steps = max_steps
50
- self.max_time = max_time
51
- self.output_json_dir = output_json_dir
52
- self.root_cache_dir = root_cache_dir
53
-
54
- self.output_types = output_types.lower().split(',')
55
- assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
56
-
57
-
58
- def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, api_key: str, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
59
- """
60
- Streams intermediate thoughts and final responses for the problem-solving process based on user input.
61
-
62
- Args:
63
- user_query (str): The text query input from the user.
64
- user_image (Image.Image): The uploaded image from the user (PIL Image object).
65
- messages (list): A list of ChatMessage objects to store the streamed responses.
66
- """
67
-
68
- if user_image:
69
- # # Convert PIL Image to bytes (for processing)
70
- # img_bytes_io = io.BytesIO()
71
- # user_image.save(img_bytes_io, format="PNG") # Convert image to PNG bytes
72
- # img_bytes = img_bytes_io.getvalue() # Get bytes
73
-
74
- # Use image paths instead of bytes,
75
- os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
76
- img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
77
- user_image.save(img_path)
78
- else:
79
- img_path = None
80
-
81
- # Set query cache
82
- _cache_dir = os.path.join(self.root_cache_dir)
83
- self.executor.set_query_cache_dir(_cache_dir)
84
-
85
- # Step 1: Display the received inputs
86
- if user_image:
87
- messages.append(ChatMessage(role="assistant", content=f"📝 Received Query: {user_query}\n🖼️ Image Uploaded"))
88
- else:
89
- messages.append(ChatMessage(role="assistant", content=f"📝 Received Query: {user_query}"))
90
- yield messages
91
-
92
- # # Step 2: Add "thinking" status while processing
93
- # messages.append(ChatMessage(
94
- # role="assistant",
95
- # content="",
96
- # metadata={"title": "⏳ Thinking: Processing input..."}
97
- # ))
98
-
99
- # Step 3: Initialize problem-solving state
100
- start_time = time.time()
101
- step_count = 0
102
- json_data = {"query": user_query, "image": "Image received as bytes"}
103
-
104
- # Step 4: Query Analysis
105
- query_analysis = self.planner.analyze_query(user_query, img_path)
106
- json_data["query_analysis"] = query_analysis
107
- messages.append(ChatMessage(role="assistant",
108
- content=f"{query_analysis}",
109
- metadata={"title": "🔍 Query Analysis"}))
110
- yield messages
111
-
112
- # Step 5: Execution loop (similar to your step-by-step solver)
113
- while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
114
- step_count += 1
115
- # messages.append(ChatMessage(role="assistant",
116
- # content=f"Generating next step...",
117
- # metadata={"title": f"🔄 Step {step_count}"}))
118
- yield messages
119
-
120
- # Generate the next step
121
- next_step = self.planner.generate_next_step(
122
- user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
123
- )
124
- context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
125
-
126
- # Display the step information
127
- messages.append(ChatMessage(
128
- role="assistant",
129
- content=f"- Context: {context}\n- Sub-goal: {sub_goal}\n- Tool: {tool_name}",
130
- metadata={"title": f"📌 Step {step_count}: {tool_name}"}
131
- ))
132
- yield messages
133
-
134
- # Handle tool execution or errors
135
- if tool_name not in self.planner.available_tools:
136
- messages.append(ChatMessage(
137
- role="assistant",
138
- content=f"⚠️ Error: Tool '{tool_name}' is not available."))
139
- yield messages
140
- continue
141
-
142
- # Execute the tool command
143
- tool_command = self.executor.generate_tool_command(
144
- user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
145
- )
146
- explanation, command = self.executor.extract_explanation_and_command(tool_command)
147
- result = self.executor.execute_tool_command(tool_name, command)
148
- result = make_json_serializable(result)
149
-
150
- messages.append(ChatMessage(
151
- role="assistant",
152
- content=f"{json.dumps(result, indent=4)}",
153
- metadata={"title": f"✅ Step {step_count} Result: {tool_name}"}))
154
- yield messages
155
-
156
- # Step 6: Memory update and stopping condition
157
- self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
158
- stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
159
- conclusion = self.planner.extract_conclusion(stop_verification)
160
-
161
- messages.append(ChatMessage(
162
- role="assistant",
163
- content=f"🛑 Step {step_count} Conclusion: {conclusion}"))
164
- yield messages
165
-
166
- if conclusion == 'STOP':
167
- break
168
-
169
- # Step 7: Generate Final Output (if needed)
170
- if 'final' in self.output_types:
171
- final_output = self.planner.generate_final_output(user_query, img_path, self.memory)
172
- messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
173
- yield messages
174
-
175
- if 'direct' in self.output_types:
176
- direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
177
- messages.append(ChatMessage(role="assistant", content=f"🔹 Direct Output:\n{direct_output}"))
178
- yield messages
179
-
180
- # Step 8: Completion Message
181
- messages.append(ChatMessage(role="assistant", content="✅ Problem-solving process complete."))
182
- yield messages
183
-
184
-
185
- def parse_arguments():
186
- parser = argparse.ArgumentParser(description="Run the OctoTools demo with specified parameters.")
187
- parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
188
- parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
189
- parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
190
- parser.add_argument("--task", default="minitoolbench", help="Task to run.")
191
- parser.add_argument("--task_description", default="", help="Task description.")
192
- parser.add_argument(
193
- "--output_types",
194
- default="base,final,direct",
195
- help="Comma-separated list of required outputs (base,final,direct)"
196
- )
197
- parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
198
- parser.add_argument("--root_cache_dir", default="demo_solver_cache", help="Path to solver cache directory.")
199
- parser.add_argument("--output_json_dir", default="demo_results", help="Path to output JSON directory.")
200
- parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
201
- return parser.parse_args()
202
-
203
-
204
- def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_key=None, llm_model_engine=None, enabled_tools=None):
205
- """
206
- Wrapper function to connect the solver to Gradio.
207
- Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
208
- """
209
-
210
- if api_key is None:
211
- return [["assistant", "⚠️ Error: OpenAI API Key is required."]]
212
-
213
- # Initialize Tools
214
- enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
215
-
216
- # Hack enabled_tools
217
- enabled_tools = ["Generalist_Solution_Generator_Tool"]
218
- # Instantiate Initializer
219
- initializer = Initializer(
220
- enabled_tools=enabled_tools,
221
- model_string=llm_model_engine,
222
- api_key=api_key
223
- )
224
-
225
- # Instantiate Planner
226
- planner = Planner(
227
- llm_engine_name=llm_model_engine,
228
- toolbox_metadata=initializer.toolbox_metadata,
229
- available_tools=initializer.available_tools,
230
- api_key=api_key
231
- )
232
-
233
- # Instantiate Memory
234
- memory = Memory()
235
-
236
- # Instantiate Executor
237
- executor = Executor(
238
- llm_engine_name=llm_model_engine,
239
- root_cache_dir=args.root_cache_dir,
240
- enable_signal=False,
241
- api_key=api_key
242
- )
243
-
244
- # Instantiate Solver
245
- solver = Solver(
246
- planner=planner,
247
- memory=memory,
248
- executor=executor,
249
- task=args.task,
250
- task_description=args.task_description,
251
- output_types=args.output_types, # Add new parameter
252
- verbose=args.verbose,
253
- max_steps=max_steps,
254
- max_time=max_time,
255
- output_json_dir=args.output_json_dir,
256
- root_cache_dir=args.root_cache_dir
257
- )
258
-
259
- if solver is None:
260
- return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
261
-
262
- messages = [] # Initialize message list
263
- for message_batch in solver.stream_solve_user_problem(user_query, user_image, api_key, messages):
264
- yield [msg for msg in message_batch] # Ensure correct format for Gradio Chatbot
265
-
266
-
267
-
268
- def main(args):
269
- #################### Gradio Interface ####################
270
- with gr.Blocks() as demo:
271
- gr.Markdown("# 🧠 The OctoTools Agentic Solver") # Title
272
-
273
- with gr.Row():
274
- with gr.Column(scale=2):
275
- api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
276
- user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
277
-
278
- with gr.Row():
279
- with gr.Column(scale=8):
280
- user_query = gr.Textbox(show_label=False, placeholder="Type your question here...", container=False)
281
- with gr.Column(scale=1):
282
- run_button = gr.Button("Run") # Run button
283
-
284
- max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1, label="Max Steps")
285
- max_time = gr.Slider(value=150, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
286
- llm_model_engine = gr.Dropdown(
287
- choices=["gpt-4o", "gpt-4o-2024-11-20", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13",
288
- "gpt-4o-mini", "gpt-4o-mini-2024-07-18"],
289
- value="gpt-4o",
290
- label="LLM Model"
291
- )
292
- enabled_tools = gr.CheckboxGroup(
293
- choices=all_tools,
294
- value=all_tools,
295
- label="Enabled Tools"
296
- )
297
-
298
- with gr.Column(scale=2):
299
- api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
300
- user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
301
-
302
- with gr.Row():
303
- with gr.Column(scale=8):
304
- user_query = gr.Textbox(show_label=False, placeholder="Type your question here...", container=False)
305
- with gr.Column(scale=1):
306
- run_button = gr.Button("Run") # Run button
307
-
308
- max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1, label="Max Steps")
309
- max_time = gr.Slider(value=150, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
310
- llm_model_engine = gr.Dropdown(
311
- choices=["gpt-4o", "gpt-4o-2024-11-20", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13",
312
- "gpt-4o-mini", "gpt-4o-mini-2024-07-18"],
313
- value="gpt-4o",
314
- label="LLM Model"
315
- )
316
- enabled_tools = gr.CheckboxGroup(
317
- choices=all_tools,
318
- value=all_tools,
319
- label="Enabled Tools"
320
- )
321
-
322
-
323
- with gr.Column(scale=2):
324
- chatbot_output = gr.Chatbot(type="messages", label="Problem-Solving Output")
325
- # chatbot_output.like(lambda x: print(f"User liked: {x}"))
326
-
327
- with gr.Row(elem_id="buttons") as button_row:
328
- upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
329
- downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
330
- clear_btn = gr.Button(value="🗑️ Clear history", interactive=False)
331
-
332
- # Link button click to function
333
- run_button.click(
334
- fn=solve_problem_gradio,
335
- inputs=[user_query, user_image, max_steps, max_time, api_key, llm_model_engine, enabled_tools],
336
- outputs=chatbot_output
337
- )
338
- #################### Gradio Interface ####################
339
-
340
- # Launch the Gradio app
341
- demo.launch()
342
-
343
-
344
- if __name__ == "__main__":
345
- args = parse_arguments()
346
-
347
- # Manually set enabled tools
348
- # args.enabled_tools = "Generalist_Solution_Generator_Tool"
349
-
350
- # All tools
351
- all_tools = [
352
- "Generalist_Solution_Generator_Tool",
353
-
354
- "Image_Captioner_Tool",
355
- "Object_Detector_Tool",
356
- "Text_Detector_Tool",
357
- "Relevant_Patch_Zoomer_Tool",
358
-
359
- "Python_Code_Generator_Tool",
360
-
361
- "ArXiv_Paper_Searcher_Tool",
362
- "Google_Search_Tool",
363
- "Nature_News_Fetcher_Tool",
364
- "Pubmed_Search_Tool",
365
- "URL_Text_Extractor_Tool",
366
- "Wikipedia_Knowledge_Searcher_Tool"
367
- ]
368
- args.enabled_tools = ",".join(all_tools)
369
-
370
- main(args)
371
-