Spaces:
Running
on
A10G
Running
on
A10G
polished inference visualization
Browse files- app.py +56 -31
- octotools/models/executor.py +2 -1
app.py
CHANGED
@@ -204,9 +204,9 @@ class Solver:
|
|
204 |
|
205 |
# Step 1: Display the received inputs
|
206 |
if user_image:
|
207 |
-
messages.append(ChatMessage(role="assistant", content=f"π Received Query
|
208 |
else:
|
209 |
-
messages.append(ChatMessage(role="assistant", content=f"π Received Query
|
210 |
yield messages
|
211 |
|
212 |
# # Step 2: Add "thinking" status while processing
|
@@ -216,17 +216,25 @@ class Solver:
|
|
216 |
# metadata={"title": "β³ Thinking: Processing input..."}
|
217 |
# ))
|
218 |
|
219 |
-
# Step 3
|
220 |
start_time = time.time()
|
221 |
step_count = 0
|
222 |
json_data = {"query": user_query, "image": "Image received as bytes"}
|
223 |
|
224 |
-
|
|
|
|
|
|
|
|
|
225 |
query_analysis = self.planner.analyze_query(user_query, img_path)
|
226 |
json_data["query_analysis"] = query_analysis
|
|
|
|
|
|
|
|
|
227 |
messages.append(ChatMessage(role="assistant",
|
228 |
-
content=f"{query_analysis}",
|
229 |
-
metadata={"title": "π Query Analysis"}))
|
230 |
yield messages
|
231 |
|
232 |
# Save the query analysis data
|
@@ -236,15 +244,17 @@ class Solver:
|
|
236 |
}
|
237 |
save_module_data(QUERY_ID, "step_0_query_analysis", query_analysis_data)
|
238 |
|
239 |
-
|
|
|
|
|
240 |
while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
|
241 |
step_count += 1
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
yield messages
|
246 |
|
247 |
-
# Generate the next step
|
248 |
next_step = self.planner.generate_next_step(
|
249 |
user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
|
250 |
)
|
@@ -261,9 +271,8 @@ class Solver:
|
|
261 |
# Display the step information
|
262 |
messages.append(ChatMessage(
|
263 |
role="assistant",
|
264 |
-
content=f"
|
265 |
-
metadata={"title": f"
|
266 |
-
))
|
267 |
yield messages
|
268 |
|
269 |
# Handle tool execution or errors
|
@@ -274,22 +283,38 @@ class Solver:
|
|
274 |
yield messages
|
275 |
continue
|
276 |
|
277 |
-
#
|
278 |
tool_command = self.executor.generate_tool_command(
|
279 |
user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
|
280 |
)
|
281 |
-
explanation, command = self.executor.extract_explanation_and_command(tool_command)
|
282 |
result = self.executor.execute_tool_command(tool_name, command)
|
283 |
result = make_json_serializable(result)
|
284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
# Save the command generation data
|
286 |
command_generation_data = {
|
|
|
287 |
"explanation": explanation,
|
288 |
"command": command,
|
289 |
"time": round(time.time() - start_time, 5)
|
290 |
}
|
291 |
save_module_data(QUERY_ID, f"step_{step_count}_command_generation", command_generation_data)
|
292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
# Save the command execution data
|
294 |
command_execution_data = {
|
295 |
"result": result,
|
@@ -297,13 +322,7 @@ class Solver:
|
|
297 |
}
|
298 |
save_module_data(QUERY_ID, f"step_{step_count}_command_execution", command_execution_data)
|
299 |
|
300 |
-
|
301 |
-
role="assistant",
|
302 |
-
content=f"{json.dumps(result, indent=4)}",
|
303 |
-
metadata={"title": f"β
Step {step_count} Result: {tool_name}"}))
|
304 |
-
yield messages
|
305 |
-
|
306 |
-
# Step 6: Memory update and stopping condition
|
307 |
self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
|
308 |
stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
|
309 |
conclusion = self.planner.extract_conclusion(stop_verification)
|
@@ -316,9 +335,12 @@ class Solver:
|
|
316 |
}
|
317 |
save_module_data(QUERY_ID, f"step_{step_count}_context_verification", context_verification_data)
|
318 |
|
|
|
|
|
319 |
messages.append(ChatMessage(
|
320 |
role="assistant",
|
321 |
-
content=f"
|
|
|
322 |
yield messages
|
323 |
|
324 |
if conclusion == 'STOP':
|
@@ -326,8 +348,9 @@ class Solver:
|
|
326 |
|
327 |
# Step 7: Generate Final Output (if needed)
|
328 |
if 'direct' in self.output_types:
|
|
|
329 |
direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
|
330 |
-
messages.append(ChatMessage(role="assistant", content=f"
|
331 |
yield messages
|
332 |
|
333 |
# Save the direct output data
|
@@ -351,7 +374,9 @@ class Solver:
|
|
351 |
save_module_data(QUERY_ID, "final_output", final_output_data)
|
352 |
|
353 |
# Step 8: Completion Message
|
354 |
-
messages.append(ChatMessage(role="assistant", content="
|
|
|
|
|
355 |
yield messages
|
356 |
|
357 |
|
@@ -501,7 +526,7 @@ def main(args):
|
|
501 |
# container=False
|
502 |
)
|
503 |
else:
|
504 |
-
print(f"Using local API key from environment variable: {os.getenv('OPENAI_API_KEY')[:
|
505 |
api_key = gr.Textbox(
|
506 |
value=os.getenv("OPENAI_API_KEY"),
|
507 |
visible=False,
|
@@ -516,10 +541,10 @@ def main(args):
|
|
516 |
label="LLM Model"
|
517 |
)
|
518 |
with gr.Row():
|
519 |
-
max_steps = gr.Slider(value=
|
520 |
|
521 |
with gr.Row():
|
522 |
-
max_time = gr.Slider(value=
|
523 |
|
524 |
with gr.Row():
|
525 |
# Container for tools section
|
@@ -562,7 +587,7 @@ def main(args):
|
|
562 |
|
563 |
# Right column for the output
|
564 |
with gr.Column(scale=3):
|
565 |
-
chatbot_output = gr.Chatbot(type="messages", label="Step-wise Problem-Solving Output
|
566 |
|
567 |
# TODO: Add actions to the buttons
|
568 |
with gr.Row(elem_id="buttons") as button_row:
|
@@ -659,7 +684,7 @@ def main(args):
|
|
659 |
"Need expert insights."],
|
660 |
|
661 |
],
|
662 |
-
inputs=[gr.Textbox(label="Category"), user_image, user_query, enabled_tools, gr.Textbox(label="Reference Answer")],
|
663 |
# label="Try these examples with suggested tools."
|
664 |
)
|
665 |
|
|
|
204 |
|
205 |
# Step 1: Display the received inputs
|
206 |
if user_image:
|
207 |
+
messages.append(ChatMessage(role="assistant", content=f"### π Received Query:\n{user_query}\n### πΌοΈ Image Uploaded"))
|
208 |
else:
|
209 |
+
messages.append(ChatMessage(role="assistant", content=f"### π Received Query:\n{user_query}"))
|
210 |
yield messages
|
211 |
|
212 |
# # Step 2: Add "thinking" status while processing
|
|
|
216 |
# metadata={"title": "β³ Thinking: Processing input..."}
|
217 |
# ))
|
218 |
|
219 |
+
# [Step 3] Initialize problem-solving state
|
220 |
start_time = time.time()
|
221 |
step_count = 0
|
222 |
json_data = {"query": user_query, "image": "Image received as bytes"}
|
223 |
|
224 |
+
messages.append(ChatMessage(role="assistant", content="<br>"))
|
225 |
+
messages.append(ChatMessage(role="assistant", content="### π Reasoning Steps from OctoTools (Deep Thinking...)"))
|
226 |
+
yield messages
|
227 |
+
|
228 |
+
# [Step 4] Query Analysis
|
229 |
query_analysis = self.planner.analyze_query(user_query, img_path)
|
230 |
json_data["query_analysis"] = query_analysis
|
231 |
+
query_analysis = query_analysis.replace("Consice Summary:", "**Consice Summary:**\n")
|
232 |
+
query_analysis = query_analysis.replace("Required Skills:", "**Required Skills:**\n")
|
233 |
+
query_analysis = query_analysis.replace("Relevant Tools:", "**Relevant Tools:**\n")
|
234 |
+
query_analysis = query_analysis.replace("Additional Considerations:", "**Additional Considerations:**\n")
|
235 |
messages.append(ChatMessage(role="assistant",
|
236 |
+
content=f"{query_analysis}",
|
237 |
+
metadata={"title": "### π Step 0: Query Analysis"}))
|
238 |
yield messages
|
239 |
|
240 |
# Save the query analysis data
|
|
|
244 |
}
|
245 |
save_module_data(QUERY_ID, "step_0_query_analysis", query_analysis_data)
|
246 |
|
247 |
+
|
248 |
+
|
249 |
+
# Execution loop (similar to your step-by-step solver)
|
250 |
while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
|
251 |
step_count += 1
|
252 |
+
messages.append(ChatMessage(role="OctoTools",
|
253 |
+
content=f"Generating the {step_count}-th step...",
|
254 |
+
metadata={"title": f"π Step {step_count}"}))
|
255 |
yield messages
|
256 |
|
257 |
+
# [Step 5] Generate the next step
|
258 |
next_step = self.planner.generate_next_step(
|
259 |
user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
|
260 |
)
|
|
|
271 |
# Display the step information
|
272 |
messages.append(ChatMessage(
|
273 |
role="assistant",
|
274 |
+
content=f"**Context:** {context}\n\n**Sub-goal:** {sub_goal}\n\n**Tool:** `{tool_name}`",
|
275 |
+
metadata={"title": f"### π― Step {step_count}: Action Prediction ({tool_name})"}))
|
|
|
276 |
yield messages
|
277 |
|
278 |
# Handle tool execution or errors
|
|
|
283 |
yield messages
|
284 |
continue
|
285 |
|
286 |
+
# [Step 6-7] Generate and execute the tool command
|
287 |
tool_command = self.executor.generate_tool_command(
|
288 |
user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
|
289 |
)
|
290 |
+
analysis, explanation, command = self.executor.extract_explanation_and_command(tool_command)
|
291 |
result = self.executor.execute_tool_command(tool_name, command)
|
292 |
result = make_json_serializable(result)
|
293 |
|
294 |
+
# Display the ommand generation information
|
295 |
+
messages.append(ChatMessage(
|
296 |
+
role="assistant",
|
297 |
+
content=f"**Analysis:** {analysis}\n\n**Explanation:** {explanation}\n\n**Command:**\n```python\n{command}\n```",
|
298 |
+
metadata={"title": f"### π Step {step_count}: Command Generation ({tool_name})"}))
|
299 |
+
yield messages
|
300 |
+
|
301 |
# Save the command generation data
|
302 |
command_generation_data = {
|
303 |
+
"analysis": analysis,
|
304 |
"explanation": explanation,
|
305 |
"command": command,
|
306 |
"time": round(time.time() - start_time, 5)
|
307 |
}
|
308 |
save_module_data(QUERY_ID, f"step_{step_count}_command_generation", command_generation_data)
|
309 |
|
310 |
+
# Display the command execution result
|
311 |
+
messages.append(ChatMessage(
|
312 |
+
role="assistant",
|
313 |
+
content=f"**Result:**\n```json\n{json.dumps(result, indent=4)}\n```",
|
314 |
+
# content=f"**Result:**\n```json\n{result}\n```",
|
315 |
+
metadata={"title": f"### π οΈ Step {step_count}: Command Execution ({tool_name})"}))
|
316 |
+
yield messages
|
317 |
+
|
318 |
# Save the command execution data
|
319 |
command_execution_data = {
|
320 |
"result": result,
|
|
|
322 |
}
|
323 |
save_module_data(QUERY_ID, f"step_{step_count}_command_execution", command_execution_data)
|
324 |
|
325 |
+
# [Step 8] Memory update and stopping condition
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
|
327 |
stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
|
328 |
conclusion = self.planner.extract_conclusion(stop_verification)
|
|
|
335 |
}
|
336 |
save_module_data(QUERY_ID, f"step_{step_count}_context_verification", context_verification_data)
|
337 |
|
338 |
+
# Display the context verification result
|
339 |
+
conclusion_emoji = "β
" if conclusion == 'STOP' else "π"
|
340 |
messages.append(ChatMessage(
|
341 |
role="assistant",
|
342 |
+
content=f"**Analysis:** {analysis}\n\n**Conclusion:** `{conclusion}` {conclusion_emoji}",
|
343 |
+
metadata={"title": f"### π€ Step {step_count}: Context Verification"}))
|
344 |
yield messages
|
345 |
|
346 |
if conclusion == 'STOP':
|
|
|
348 |
|
349 |
# Step 7: Generate Final Output (if needed)
|
350 |
if 'direct' in self.output_types:
|
351 |
+
messages.append(ChatMessage(role="assistant", content="<br>"))
|
352 |
direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
|
353 |
+
messages.append(ChatMessage(role="assistant", content=f"### π Final Answer:\n{direct_output}"))
|
354 |
yield messages
|
355 |
|
356 |
# Save the direct output data
|
|
|
374 |
save_module_data(QUERY_ID, "final_output", final_output_data)
|
375 |
|
376 |
# Step 8: Completion Message
|
377 |
+
messages.append(ChatMessage(role="assistant", content="<br>"))
|
378 |
+
messages.append(ChatMessage(role="assistant", content="### β
Query Solved!"))
|
379 |
+
messages.append(ChatMessage(role="assistant", content="How do you like the output from OctoTools π? Please give us your feedback below. \n\nπ If the answer is correct or the reasoning steps are helpful, please upvote the output. \nπ If it is incorrect or the reasoning steps are not helpful, please downvote the output. \nπ¬ If you have any suggestions or comments, please leave them below.\n\nThank you for using OctoTools! π"))
|
380 |
yield messages
|
381 |
|
382 |
|
|
|
526 |
# container=False
|
527 |
)
|
528 |
else:
|
529 |
+
print(f"Using local API key from environment variable: ...{os.getenv('OPENAI_API_KEY')[-4:]}")
|
530 |
api_key = gr.Textbox(
|
531 |
value=os.getenv("OPENAI_API_KEY"),
|
532 |
visible=False,
|
|
|
541 |
label="LLM Model"
|
542 |
)
|
543 |
with gr.Row():
|
544 |
+
max_steps = gr.Slider(value=8, minimum=1, maximum=10, step=1, label="Max Steps")
|
545 |
|
546 |
with gr.Row():
|
547 |
+
max_time = gr.Slider(value=240, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
|
548 |
|
549 |
with gr.Row():
|
550 |
# Container for tools section
|
|
|
587 |
|
588 |
# Right column for the output
|
589 |
with gr.Column(scale=3):
|
590 |
+
chatbot_output = gr.Chatbot(type="messages", label="Step-wise Problem-Solving Output", height=500)
|
591 |
|
592 |
# TODO: Add actions to the buttons
|
593 |
with gr.Row(elem_id="buttons") as button_row:
|
|
|
684 |
"Need expert insights."],
|
685 |
|
686 |
],
|
687 |
+
inputs=[gr.Textbox(label="Category", visible=False), user_image, user_query, enabled_tools, gr.Textbox(label="Reference Answer", visible=False)],
|
688 |
# label="Try these examples with suggested tools."
|
689 |
)
|
690 |
|
octotools/models/executor.py
CHANGED
@@ -151,9 +151,10 @@ Remember: Your <command> field MUST be valid Python code including any necessary
|
|
151 |
# Remove leading and trailing whitespace and triple backticks
|
152 |
return re.sub(r'^```python\s*', '', code).rstrip('```').strip()
|
153 |
|
|
|
154 |
explanation = response.explanation.strip()
|
155 |
command = normarlize_code(response.command.strip())
|
156 |
-
return explanation, command
|
157 |
|
158 |
def execute_tool_command(self, tool_name: str, command: str) -> Any:
|
159 |
"""
|
|
|
151 |
# Remove leading and trailing whitespace and triple backticks
|
152 |
return re.sub(r'^```python\s*', '', code).rstrip('```').strip()
|
153 |
|
154 |
+
analysis = response.analysis.strip() # NOTE: added this line
|
155 |
explanation = response.explanation.strip()
|
156 |
command = normarlize_code(response.command.strip())
|
157 |
+
return analysis, explanation, command
|
158 |
|
159 |
def execute_tool_command(self, tool_name: str, command: str) -> Any:
|
160 |
"""
|