lupantech commited on
Commit
9ab860d
Β·
1 Parent(s): a181016

polished ui

Browse files
app.py CHANGED
@@ -201,7 +201,7 @@ def parse_arguments():
201
  return parser.parse_args()
202
 
203
 
204
- def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_key=None):
205
  """
206
  Wrapper function to connect the solver to Gradio.
207
  Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
@@ -213,16 +213,18 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
213
  # Initialize Tools
214
  enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
215
 
 
 
216
  # Instantiate Initializer
217
  initializer = Initializer(
218
  enabled_tools=enabled_tools,
219
- model_string=args.llm_engine_name,
220
  api_key=api_key
221
  )
222
 
223
  # Instantiate Planner
224
  planner = Planner(
225
- llm_engine_name=args.llm_engine_name,
226
  toolbox_metadata=initializer.toolbox_metadata,
227
  available_tools=initializer.available_tools,
228
  api_key=api_key
@@ -233,7 +235,7 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
233
 
234
  # Instantiate Executor
235
  executor = Executor(
236
- llm_engine_name=args.llm_engine_name,
237
  root_cache_dir=args.root_cache_dir,
238
  enable_signal=False,
239
  api_key=api_key
@@ -262,33 +264,81 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
262
  yield [msg for msg in message_batch] # Ensure correct format for Gradio Chatbot
263
 
264
 
265
-
266
  def main(args):
267
  #################### Gradio Interface ####################
268
  with gr.Blocks() as demo:
269
- gr.Markdown("# 🧠 The OctoTools Agentic Solver") # Title
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
  with gr.Row():
272
  with gr.Column(scale=1):
273
- api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
274
- user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
275
- max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1, label="Max Steps")
276
- max_time = gr.Slider(value=150, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  with gr.Column(scale=3):
278
- chatbot_output = gr.Chatbot(type="messages", label="Problem-Solving Output")
279
  # chatbot_output.like(lambda x: print(f"User liked: {x}"))
280
- with gr.Row():
281
- with gr.Column(scale=8):
282
- user_query = gr.Textbox(show_label=False, placeholder="Type your question here...", container=False)
283
- with gr.Column(scale=1):
284
- run_button = gr.Button("Run") # Run button
285
  with gr.Row(elem_id="buttons") as button_row:
286
- upvote_btn = gr.Button(value="πŸ‘ Upvote", interactive=False)
287
- downvote_btn = gr.Button(value="πŸ‘Ž Downvote", interactive=False)
288
- clear_btn = gr.Button(value="πŸ—‘οΈ Clear history", interactive=False)
 
 
 
 
 
289
 
290
  # Link button click to function
291
- run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image, max_steps, max_time, api_key], outputs=chatbot_output)
 
 
 
 
292
  #################### Gradio Interface ####################
293
 
294
  # Launch the Gradio app
@@ -301,7 +351,6 @@ if __name__ == "__main__":
301
  # Manually set enabled tools
302
  # args.enabled_tools = "Generalist_Solution_Generator_Tool"
303
 
304
-
305
  # All tools
306
  all_tools = [
307
  "Generalist_Solution_Generator_Tool",
 
201
  return parser.parse_args()
202
 
203
 
204
+ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_key=None, llm_model_engine=None, enabled_tools=None):
205
  """
206
  Wrapper function to connect the solver to Gradio.
207
  Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
 
213
  # Initialize Tools
214
  enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
215
 
216
+ # Hack enabled_tools
217
+ enabled_tools = ["Generalist_Solution_Generator_Tool"]
218
  # Instantiate Initializer
219
  initializer = Initializer(
220
  enabled_tools=enabled_tools,
221
+ model_string=llm_model_engine,
222
  api_key=api_key
223
  )
224
 
225
  # Instantiate Planner
226
  planner = Planner(
227
+ llm_engine_name=llm_model_engine,
228
  toolbox_metadata=initializer.toolbox_metadata,
229
  available_tools=initializer.available_tools,
230
  api_key=api_key
 
235
 
236
  # Instantiate Executor
237
  executor = Executor(
238
+ llm_engine_name=llm_model_engine,
239
  root_cache_dir=args.root_cache_dir,
240
  enable_signal=False,
241
  api_key=api_key
 
264
  yield [msg for msg in message_batch] # Ensure correct format for Gradio Chatbot
265
 
266
 
 
267
  def main(args):
268
  #################### Gradio Interface ####################
269
  with gr.Blocks() as demo:
270
+ gr.Markdown("# πŸ™ Chat with OctoTools: An Agentic Framework for Complex Reasoning") # Title
271
+ # gr.Markdown("[![OctoTools](https://img.shields.io/badge/OctoTools-Agentic%20Framework%20for%20Complex%20Reasoning-blue)](https://octotools.github.io/)") # Title
272
+ gr.Markdown("""
273
+ **OctoTools** is a training-free, user-friendly, and easily extensible open-source agentic framework designed to tackle complex reasoning across diverse domains.
274
+ It introduces standardized **tool cards** to encapsulate tool functionality, a **planner** for both high-level and low-level planning, and an **executor** to carry out tool usage.
275
+
276
+ [Website](https://octotools.github.io/) |
277
+ [Github](https://github.com/octotools/octotools) |
278
+ [arXiv](https://github.com/octotools/octotools/assets/paper.pdf) |
279
+ [Paper](https://github.com/octotools/octotools/assets/paper.pdf) |
280
+ [Tool Cards](https://octotools.github.io/#tool-cards) |
281
+ [Example Visualizations](https://octotools.github.io/#visualization)
282
+ """)
283
 
284
  with gr.Row():
285
  with gr.Column(scale=1):
286
+ with gr.Row():
287
+ api_key = gr.Textbox(
288
+ show_label=True,
289
+ placeholder="Your API key will not be stored in any way.",
290
+ type="password",
291
+ label="OpenAI API Key",
292
+ # container=False
293
+ )
294
+
295
+ llm_model_engine = gr.Dropdown(
296
+ choices=["gpt-4o", "gpt-4o-2024-11-20", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13",
297
+ "gpt-4o-mini", "gpt-4o-mini-2024-07-18"],
298
+ value="gpt-4o",
299
+ label="LLM Model"
300
+ )
301
+ with gr.Row():
302
+ max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1, label="Max Steps")
303
+ max_time = gr.Slider(value=180, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
304
+
305
+ with gr.Row():
306
+ enabled_tools = gr.CheckboxGroup(
307
+ choices=all_tools,
308
+ value=all_tools,
309
+ label="Enabled Tools",
310
+ )
311
+
312
+ with gr.Column(scale=2):
313
+ user_image = gr.Image(type="pil", label="Upload an image (optional)", height=500) # Accepts multiple formats
314
+
315
+ with gr.Row():
316
+ user_query = gr.Textbox( placeholder="Type your question here...", label="Query")
317
+
318
+ with gr.Row():
319
+ run_button = gr.Button("Run") # Run button
320
+
321
  with gr.Column(scale=3):
322
+ chatbot_output = gr.Chatbot(type="messages", label="Step-wise problem-solving output (Deep Thinking)", height=500)
323
  # chatbot_output.like(lambda x: print(f"User liked: {x}"))
324
+
325
+ # TODO: Add actions to the buttons
 
 
 
326
  with gr.Row(elem_id="buttons") as button_row:
327
+ upvote_btn = gr.Button(value="πŸ‘ Upvote", interactive=True)
328
+ downvote_btn = gr.Button(value="πŸ‘Ž Downvote", interactive=True)
329
+ clear_btn = gr.Button(value="πŸ—‘οΈ Clear history", interactive=True)
330
+
331
+ with gr.Row():
332
+ comment_textbox = gr.Textbox(value="",
333
+ placeholder="Feel free to add any comments here. Thanks for using OctoTools!",
334
+ label="πŸ’¬ Comment", interactive=True)
335
 
336
  # Link button click to function
337
+ run_button.click(
338
+ fn=solve_problem_gradio,
339
+ inputs=[user_query, user_image, max_steps, max_time, api_key, llm_model_engine, enabled_tools],
340
+ outputs=chatbot_output
341
+ )
342
  #################### Gradio Interface ####################
343
 
344
  # Launch the Gradio app
 
351
  # Manually set enabled tools
352
  # args.enabled_tools = "Generalist_Solution_Generator_Tool"
353
 
 
354
  # All tools
355
  all_tools = [
356
  "Generalist_Solution_Generator_Tool",
app_bak_0215.py β†’ app_v1_0215.py RENAMED
File without changes
app_v2_0216.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import argparse
5
+ import time
6
+ import io
7
+ import uuid
8
+ from PIL import Image
9
+ from typing import List, Dict, Any, Iterator
10
+
11
+ import gradio as gr
12
+ from gradio import ChatMessage
13
+
14
+ # Add the project root to the Python path
15
+ current_dir = os.path.dirname(os.path.abspath(__file__))
16
+ project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
17
+ sys.path.insert(0, project_root)
18
+
19
+ from octotools.models.initializer import Initializer
20
+ from octotools.models.planner import Planner
21
+ from octotools.models.memory import Memory
22
+ from octotools.models.executor import Executor
23
+ from octotools.models.utils import make_json_serializable
24
+
25
+
26
+ class Solver:
27
+ def __init__(
28
+ self,
29
+ planner,
30
+ memory,
31
+ executor,
32
+ task: str,
33
+ task_description: str,
34
+ output_types: str = "base,final,direct",
35
+ index: int = 0,
36
+ verbose: bool = True,
37
+ max_steps: int = 10,
38
+ max_time: int = 60,
39
+ output_json_dir: str = "results",
40
+ root_cache_dir: str = "cache"
41
+ ):
42
+ self.planner = planner
43
+ self.memory = memory
44
+ self.executor = executor
45
+ self.task = task
46
+ self.task_description = task_description
47
+ self.index = index
48
+ self.verbose = verbose
49
+ self.max_steps = max_steps
50
+ self.max_time = max_time
51
+ self.output_json_dir = output_json_dir
52
+ self.root_cache_dir = root_cache_dir
53
+
54
+ self.output_types = output_types.lower().split(',')
55
+ assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
56
+
57
+
58
+ def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, api_key: str, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
59
+ """
60
+ Streams intermediate thoughts and final responses for the problem-solving process based on user input.
61
+
62
+ Args:
63
+ user_query (str): The text query input from the user.
64
+ user_image (Image.Image): The uploaded image from the user (PIL Image object).
65
+ messages (list): A list of ChatMessage objects to store the streamed responses.
66
+ """
67
+
68
+ if user_image:
69
+ # # Convert PIL Image to bytes (for processing)
70
+ # img_bytes_io = io.BytesIO()
71
+ # user_image.save(img_bytes_io, format="PNG") # Convert image to PNG bytes
72
+ # img_bytes = img_bytes_io.getvalue() # Get bytes
73
+
74
+ # Use image paths instead of bytes,
75
+ os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
76
+ img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
77
+ user_image.save(img_path)
78
+ else:
79
+ img_path = None
80
+
81
+ # Set query cache
82
+ _cache_dir = os.path.join(self.root_cache_dir)
83
+ self.executor.set_query_cache_dir(_cache_dir)
84
+
85
+ # Step 1: Display the received inputs
86
+ if user_image:
87
+ messages.append(ChatMessage(role="assistant", content=f"πŸ“ Received Query: {user_query}\nπŸ–ΌοΈ Image Uploaded"))
88
+ else:
89
+ messages.append(ChatMessage(role="assistant", content=f"πŸ“ Received Query: {user_query}"))
90
+ yield messages
91
+
92
+ # # Step 2: Add "thinking" status while processing
93
+ # messages.append(ChatMessage(
94
+ # role="assistant",
95
+ # content="",
96
+ # metadata={"title": "⏳ Thinking: Processing input..."}
97
+ # ))
98
+
99
+ # Step 3: Initialize problem-solving state
100
+ start_time = time.time()
101
+ step_count = 0
102
+ json_data = {"query": user_query, "image": "Image received as bytes"}
103
+
104
+ # Step 4: Query Analysis
105
+ query_analysis = self.planner.analyze_query(user_query, img_path)
106
+ json_data["query_analysis"] = query_analysis
107
+ messages.append(ChatMessage(role="assistant",
108
+ content=f"{query_analysis}",
109
+ metadata={"title": "πŸ” Query Analysis"}))
110
+ yield messages
111
+
112
+ # Step 5: Execution loop (similar to your step-by-step solver)
113
+ while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
114
+ step_count += 1
115
+ # messages.append(ChatMessage(role="assistant",
116
+ # content=f"Generating next step...",
117
+ # metadata={"title": f"πŸ”„ Step {step_count}"}))
118
+ yield messages
119
+
120
+ # Generate the next step
121
+ next_step = self.planner.generate_next_step(
122
+ user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
123
+ )
124
+ context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
125
+
126
+ # Display the step information
127
+ messages.append(ChatMessage(
128
+ role="assistant",
129
+ content=f"- Context: {context}\n- Sub-goal: {sub_goal}\n- Tool: {tool_name}",
130
+ metadata={"title": f"πŸ“Œ Step {step_count}: {tool_name}"}
131
+ ))
132
+ yield messages
133
+
134
+ # Handle tool execution or errors
135
+ if tool_name not in self.planner.available_tools:
136
+ messages.append(ChatMessage(
137
+ role="assistant",
138
+ content=f"⚠️ Error: Tool '{tool_name}' is not available."))
139
+ yield messages
140
+ continue
141
+
142
+ # Execute the tool command
143
+ tool_command = self.executor.generate_tool_command(
144
+ user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
145
+ )
146
+ explanation, command = self.executor.extract_explanation_and_command(tool_command)
147
+ result = self.executor.execute_tool_command(tool_name, command)
148
+ result = make_json_serializable(result)
149
+
150
+ messages.append(ChatMessage(
151
+ role="assistant",
152
+ content=f"{json.dumps(result, indent=4)}",
153
+ metadata={"title": f"βœ… Step {step_count} Result: {tool_name}"}))
154
+ yield messages
155
+
156
+ # Step 6: Memory update and stopping condition
157
+ self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
158
+ stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
159
+ conclusion = self.planner.extract_conclusion(stop_verification)
160
+
161
+ messages.append(ChatMessage(
162
+ role="assistant",
163
+ content=f"πŸ›‘ Step {step_count} Conclusion: {conclusion}"))
164
+ yield messages
165
+
166
+ if conclusion == 'STOP':
167
+ break
168
+
169
+ # Step 7: Generate Final Output (if needed)
170
+ if 'final' in self.output_types:
171
+ final_output = self.planner.generate_final_output(user_query, img_path, self.memory)
172
+ messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
173
+ yield messages
174
+
175
+ if 'direct' in self.output_types:
176
+ direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
177
+ messages.append(ChatMessage(role="assistant", content=f"πŸ”Ή Direct Output:\n{direct_output}"))
178
+ yield messages
179
+
180
+ # Step 8: Completion Message
181
+ messages.append(ChatMessage(role="assistant", content="βœ… Problem-solving process complete."))
182
+ yield messages
183
+
184
+
185
+ def parse_arguments():
186
+ parser = argparse.ArgumentParser(description="Run the OctoTools demo with specified parameters.")
187
+ parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
188
+ parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
189
+ parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
190
+ parser.add_argument("--task", default="minitoolbench", help="Task to run.")
191
+ parser.add_argument("--task_description", default="", help="Task description.")
192
+ parser.add_argument(
193
+ "--output_types",
194
+ default="base,final,direct",
195
+ help="Comma-separated list of required outputs (base,final,direct)"
196
+ )
197
+ parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
198
+ parser.add_argument("--root_cache_dir", default="demo_solver_cache", help="Path to solver cache directory.")
199
+ parser.add_argument("--output_json_dir", default="demo_results", help="Path to output JSON directory.")
200
+ parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
201
+ return parser.parse_args()
202
+
203
+
204
+ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_key=None, llm_model_engine=None, enabled_tools=None):
205
+ """
206
+ Wrapper function to connect the solver to Gradio.
207
+ Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
208
+ """
209
+
210
+ if api_key is None:
211
+ return [["assistant", "⚠️ Error: OpenAI API Key is required."]]
212
+
213
+ # Initialize Tools
214
+ enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
215
+
216
+ # Hack enabled_tools
217
+ enabled_tools = ["Generalist_Solution_Generator_Tool"]
218
+ # Instantiate Initializer
219
+ initializer = Initializer(
220
+ enabled_tools=enabled_tools,
221
+ model_string=llm_model_engine,
222
+ api_key=api_key
223
+ )
224
+
225
+ # Instantiate Planner
226
+ planner = Planner(
227
+ llm_engine_name=llm_model_engine,
228
+ toolbox_metadata=initializer.toolbox_metadata,
229
+ available_tools=initializer.available_tools,
230
+ api_key=api_key
231
+ )
232
+
233
+ # Instantiate Memory
234
+ memory = Memory()
235
+
236
+ # Instantiate Executor
237
+ executor = Executor(
238
+ llm_engine_name=llm_model_engine,
239
+ root_cache_dir=args.root_cache_dir,
240
+ enable_signal=False,
241
+ api_key=api_key
242
+ )
243
+
244
+ # Instantiate Solver
245
+ solver = Solver(
246
+ planner=planner,
247
+ memory=memory,
248
+ executor=executor,
249
+ task=args.task,
250
+ task_description=args.task_description,
251
+ output_types=args.output_types, # Add new parameter
252
+ verbose=args.verbose,
253
+ max_steps=max_steps,
254
+ max_time=max_time,
255
+ output_json_dir=args.output_json_dir,
256
+ root_cache_dir=args.root_cache_dir
257
+ )
258
+
259
+ if solver is None:
260
+ return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
261
+
262
+ messages = [] # Initialize message list
263
+ for message_batch in solver.stream_solve_user_problem(user_query, user_image, api_key, messages):
264
+ yield [msg for msg in message_batch] # Ensure correct format for Gradio Chatbot
265
+
266
+
267
+
268
+ def main(args):
269
+ #################### Gradio Interface ####################
270
+ with gr.Blocks() as demo:
271
+ gr.Markdown("# 🧠 The OctoTools Agentic Solver") # Title
272
+
273
+ with gr.Row():
274
+ with gr.Column(scale=2):
275
+ api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
276
+ user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
277
+
278
+ with gr.Row():
279
+ with gr.Column(scale=8):
280
+ user_query = gr.Textbox(show_label=False, placeholder="Type your question here...", container=False)
281
+ with gr.Column(scale=1):
282
+ run_button = gr.Button("Run") # Run button
283
+
284
+ max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1, label="Max Steps")
285
+ max_time = gr.Slider(value=150, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
286
+ llm_model_engine = gr.Dropdown(
287
+ choices=["gpt-4o", "gpt-4o-2024-11-20", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13",
288
+ "gpt-4o-mini", "gpt-4o-mini-2024-07-18"],
289
+ value="gpt-4o",
290
+ label="LLM Model"
291
+ )
292
+ enabled_tools = gr.CheckboxGroup(
293
+ choices=all_tools,
294
+ value=all_tools,
295
+ label="Enabled Tools"
296
+ )
297
+
298
+ with gr.Column(scale=2):
299
+ api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
300
+ user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
301
+
302
+ with gr.Row():
303
+ with gr.Column(scale=8):
304
+ user_query = gr.Textbox(show_label=False, placeholder="Type your question here...", container=False)
305
+ with gr.Column(scale=1):
306
+ run_button = gr.Button("Run") # Run button
307
+
308
+ max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1, label="Max Steps")
309
+ max_time = gr.Slider(value=150, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
310
+ llm_model_engine = gr.Dropdown(
311
+ choices=["gpt-4o", "gpt-4o-2024-11-20", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13",
312
+ "gpt-4o-mini", "gpt-4o-mini-2024-07-18"],
313
+ value="gpt-4o",
314
+ label="LLM Model"
315
+ )
316
+ enabled_tools = gr.CheckboxGroup(
317
+ choices=all_tools,
318
+ value=all_tools,
319
+ label="Enabled Tools"
320
+ )
321
+
322
+
323
+ with gr.Column(scale=2):
324
+ chatbot_output = gr.Chatbot(type="messages", label="Problem-Solving Output")
325
+ # chatbot_output.like(lambda x: print(f"User liked: {x}"))
326
+
327
+ with gr.Row(elem_id="buttons") as button_row:
328
+ upvote_btn = gr.Button(value="πŸ‘ Upvote", interactive=False)
329
+ downvote_btn = gr.Button(value="πŸ‘Ž Downvote", interactive=False)
330
+ clear_btn = gr.Button(value="πŸ—‘οΈ Clear history", interactive=False)
331
+
332
+ # Link button click to function
333
+ run_button.click(
334
+ fn=solve_problem_gradio,
335
+ inputs=[user_query, user_image, max_steps, max_time, api_key, llm_model_engine, enabled_tools],
336
+ outputs=chatbot_output
337
+ )
338
+ #################### Gradio Interface ####################
339
+
340
+ # Launch the Gradio app
341
+ demo.launch()
342
+
343
+
344
+ if __name__ == "__main__":
345
+ args = parse_arguments()
346
+
347
+ # Manually set enabled tools
348
+ # args.enabled_tools = "Generalist_Solution_Generator_Tool"
349
+
350
+ # All tools
351
+ all_tools = [
352
+ "Generalist_Solution_Generator_Tool",
353
+
354
+ "Image_Captioner_Tool",
355
+ "Object_Detector_Tool",
356
+ "Text_Detector_Tool",
357
+ "Relevant_Patch_Zoomer_Tool",
358
+
359
+ "Python_Code_Generator_Tool",
360
+
361
+ "ArXiv_Paper_Searcher_Tool",
362
+ "Google_Search_Tool",
363
+ "Nature_News_Fetcher_Tool",
364
+ "Pubmed_Search_Tool",
365
+ "URL_Text_Extractor_Tool",
366
+ "Wikipedia_Knowledge_Searcher_Tool"
367
+ ]
368
+ args.enabled_tools = ",".join(all_tools)
369
+
370
+ main(args)
371
+
octotools/tools/object_detector/tool.py CHANGED
@@ -59,6 +59,7 @@ class Object_Detector_Tool(BaseTool):
59
  def build_tool(self, model_size='tiny'):
60
  model_name = f"IDEA-Research/grounding-dino-{model_size}"
61
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
62
  try:
63
  pipe = pipeline(model=model_name, task="zero-shot-object-detection", device=device)
64
  return pipe
 
59
  def build_tool(self, model_size='tiny'):
60
  model_name = f"IDEA-Research/grounding-dino-{model_size}"
61
  device = "cuda" if torch.cuda.is_available() else "cpu"
62
+ print(f"Building the Object Detection tool with model: {model_name} on device: {device}")
63
  try:
64
  pipe = pipeline(model=model_name, task="zero-shot-object-detection", device=device)
65
  return pipe