lupantech commited on
Commit
ae05573
·
1 Parent(s): c25a9d7

opentools-->octotools; added remaining tools; polished the ui

Browse files
octotools/tools/README.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ## Testing the Tools
3
+
4
+ To test the text detection tool, follow these steps:
5
+
6
+ 1. **Navigate to the Project Directory:**
7
+
8
+ Change your current directory to where the tools are located. Replace `your_path` with the actual path to your project directory.
9
+
10
+ ```sh
11
+ cd your_path/toolbox-agent/octotools
12
+ ```
13
+
14
+ 2. **Run the Text Detection Tool:**
15
+
16
+ ```sh
17
+ cd toolbox-agent
18
+ export PYTHONPATH=$(pwd)
19
+ ```
20
+
21
+
22
+ Execute the tool using the following command:
23
+
24
+ ```sh
25
+ python tools/text_detector/tool.py
26
+
27
+ python tools/object_detector/tool.py
28
+
29
+ ```
30
+
31
+ ## File Structure
32
+
33
+ The project is organized as follows:
34
+
35
+ ```sh
36
+ ├── __init__.py # Initializes the tools package and possibly exposes submodules
37
+ ├── base.py # Base class for tools, providing common functionality
38
+ ├── text_detector/ # Directory for the text detection tool
39
+ │ ├── readme.md # Documentation for the text detection tool
40
+ │ └── tool.py # Implementation of the text detection tool
41
+ ├── object_detector/ # Directory for the object detection tool
42
+ │ ├── readme.md # Documentation for the object detection tool
43
+ │ └── tool.py # Implementation of the object detection tool
44
+ ```
octotools/tools/__init__.py ADDED
File without changes
octotools/tools/advanced_object_detector/tool.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Grounding DINO Object Detection Tool
2
+ # https://huggingface.co/IDEA-Research/grounding-dino
3
+
4
+ import os
5
+ import time
6
+
7
+ from octotools.tools.base import BaseTool
8
+ from PIL import Image, ImageOps
9
+
10
+ import os
11
+ # Suppress stderr by redirecting it to /dev/null
12
+ import sys
13
+ import re
14
+ import base64
15
+ import requests
16
+ sys.stderr = open(os.devnull, 'w')
17
+
18
+
19
+ class Advanced_Object_Detector_Tool(BaseTool):
20
+ def __init__(self):
21
+ super().__init__(
22
+ tool_name="Advanced_Object_Detector_Tool",
23
+ tool_description="A tool that detects objects in an image using the Grounding DINO-X model and saves individual object images with empty padding.",
24
+ tool_version="1.0.0",
25
+ input_types={
26
+ "image": "str - The path to the image file.",
27
+ "labels": "list - A list of object labels to detect.",
28
+ "threshold": "float - The confidence threshold for detection (default: 0.35).",
29
+ "padding": "int - The number of pixels to add as empty padding around detected objects (default: 20)."
30
+ },
31
+ output_type="list - A list of detected objects with their scores, bounding boxes, and saved image paths.",
32
+ demo_commands=[
33
+ {
34
+ "command": 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])',
35
+ "description": "Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths."
36
+ },
37
+ {
38
+ "command": 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)',
39
+ "description": "Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths."
40
+ }
41
+ ],
42
+ user_metadata={
43
+ "limitation": "The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification."
44
+ }
45
+ )
46
+ self.DINO_KEY = os.environ.get("DINO_KEY")
47
+
48
+ def preprocess_caption(self, caption):
49
+ result = caption.lower().strip()
50
+ if result.endswith("."):
51
+ return result
52
+ return result + "."
53
+
54
+ def build_tool(self, threshold=0.35):
55
+
56
+ params_dict = {
57
+ 'headers': {
58
+ "Content-Type": "application/json",
59
+ "Token" : self.DINO_KEY
60
+ },
61
+ 'body':{
62
+ "image" : None,
63
+ "prompts": [
64
+ {"type": "text", "text": None},
65
+ ],
66
+ "bbox_threshold": threshold
67
+ }
68
+
69
+ }
70
+ return params_dict
71
+
72
+
73
+ def save_detected_object(self, image, box, image_name, label, index, padding):
74
+ object_image = image.crop(box)
75
+ padded_image = ImageOps.expand(object_image, border=padding, fill='white')
76
+
77
+ filename = f"{image_name}_{label}_{index}.png"
78
+ os.makedirs(self.output_dir, exist_ok=True)
79
+ save_path = os.path.join(self.output_dir, filename)
80
+
81
+ padded_image.save(save_path)
82
+ return save_path
83
+
84
+ def execute(self, image, labels, threshold=0.35, padding=20, max_retries=10, retry_delay=5):
85
+ retry_count = 0
86
+ params = self.build_tool(threshold)
87
+
88
+ def process_image(input_str):
89
+
90
+ def image_to_base64(image_path):
91
+ with open(image_path, "rb") as image_file:
92
+ return base64.b64encode(image_file.read()).decode('utf-8')
93
+ # Define common image file extensions
94
+ image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.tiff', '.webp'}
95
+
96
+ # Check if it is a URL
97
+ url_pattern = re.compile(r'^(http|https|ftp)://')
98
+ if url_pattern.match(input_str):
99
+ if input_str.lower().endswith(tuple(image_extensions)):
100
+ return input_str
101
+ return input_str
102
+
103
+ # Check if it is a file path
104
+ _, ext = os.path.splitext(input_str)
105
+ if ext.lower() in image_extensions:
106
+ image_base64 = image_to_base64(input_str)
107
+ return f'data:image/png;base64,{image_base64}'
108
+ return None
109
+
110
+ if len(labels) < 1:
111
+ preprocessed_prompt = '<prompt_free>'
112
+ else:
113
+ preprocessed_prompt = ''
114
+ for label in labels:
115
+ preprocessed_prompt += self.preprocess_caption(label)
116
+
117
+
118
+ body = params['body']
119
+ body['image'] = process_image(image)
120
+ body['prompts'] = [{"type": "text", "text": preprocessed_prompt}]
121
+
122
+ # send request
123
+ resp = requests.post(
124
+ 'https://api.deepdataspace.com/tasks/dinox',
125
+ json=body,
126
+ headers=params['headers']
127
+ )
128
+
129
+ if resp.status_code == 200:
130
+ json_resp = resp.json()
131
+ print(json_resp)
132
+
133
+ # get task_uuid
134
+ task_uuid = json_resp["data"]["task_uuid"]
135
+ print(f'task_uuid:{task_uuid}')
136
+
137
+ # poll get task result
138
+ while retry_count < max_retries:
139
+ resp = requests.get(f'https://api.deepdataspace.com/task_statuses/{task_uuid}', headers=params['headers'])
140
+
141
+
142
+ if resp.status_code != 200:
143
+ break
144
+ json_resp = resp.json()
145
+
146
+ if json_resp["data"]["status"] not in ["waiting", "running"]:
147
+ break
148
+ time.sleep(1)#retry_delay)
149
+ retry_count += 1
150
+
151
+ if json_resp["data"]["status"] == "failed":
152
+ print(f'failed resp: {json_resp}')
153
+ elif json_resp["data"]["status"] == "success":
154
+ # print(f'success resp: {json_resp}')
155
+ formatted_results = []
156
+ original_image = Image.open(image)
157
+ image_name = os.path.splitext(os.path.basename(image))[0]
158
+
159
+ object_counts = {}
160
+
161
+ for result in json_resp['data']['result']['objects']:
162
+ box = tuple(result["bbox"])
163
+ try:
164
+ box = [int(x) for x in box]
165
+ except:
166
+ continue
167
+ label = result["category"]
168
+ score = round(result["score"], 2)
169
+ if label.endswith("."):
170
+ label = label[:-1]
171
+
172
+ object_counts[label] = object_counts.get(label, 0) + 1
173
+ index = object_counts[label]
174
+
175
+ save_path = self.save_detected_object(original_image, box, image_name, label, index, padding)
176
+
177
+ formatted_results.append({
178
+ "label": label,
179
+ "confidence score": score,
180
+ "box": box,
181
+ "saved_image_path": save_path
182
+ })
183
+
184
+ return formatted_results
185
+ else:
186
+ print(f'get task resp: {resp.status_code} - {resp.text}')
187
+ else:
188
+ print(f'Error: {resp.status_code} - {resp.text}')
189
+
190
+ print(f"Failed to detect objects after {max_retries} attempts.")
191
+ return []
192
+
193
+ def get_metadata(self):
194
+ metadata = super().get_metadata()
195
+ return metadata
196
+
197
+ if __name__ == "__main__":
198
+ # Test command:
199
+ """
200
+ Run the following commands in the terminal to test the script:
201
+
202
+ cd octotools/tools/advanced_object_detector
203
+ python tool.py
204
+ """
205
+
206
+ # Get the directory of the current script
207
+ script_dir = os.path.dirname(os.path.abspath(__file__))
208
+
209
+ # Example usage of the Object_Detector_Tool
210
+ tool = Advanced_Object_Detector_Tool()
211
+ tool.set_custom_output_dir("detected_objects")
212
+
213
+ # Get tool metadata
214
+ metadata = tool.get_metadata()
215
+ # print(metadata)
216
+
217
+ # Construct the full path to the image using the script's directory
218
+ relative_image_path = "examples/baseball.png"
219
+ image_path = os.path.join(script_dir, relative_image_path)
220
+
221
+ import json
222
+
223
+ # Execute the tool
224
+ try:
225
+ execution = tool.execute(image=image_path, labels=["baseball", "basket"], padding=20)
226
+ print(json.dumps(execution, indent=4))
227
+ print("Detected Objects:")
228
+ for obj in execution:
229
+ print(f"Detected {obj['label']} with confidence {obj['confidence score']}")
230
+ print(f"Bounding box: {obj['box']}")
231
+ print(f"Saved image (with padding): {obj['saved_image_path']}")
232
+ print()
233
+ except ValueError as e:
234
+ print(f"Execution failed: {e}")
235
+
236
+ print("Done!")
octotools/tools/arxiv_paper_searcher/tool.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ from octotools.tools.base import BaseTool
6
+
7
+ class ArXiv_Paper_Searcher_Tool(BaseTool):
8
+ def __init__(self):
9
+ super().__init__(
10
+ tool_name="ArXiv_Paper_Searcher_Tool",
11
+ tool_description="A tool that searches arXiv for papers based on a given query.",
12
+ tool_version="1.0.0",
13
+ input_types={
14
+ "query": "str - The search query for arXiv papers.",
15
+ "size": "int - The number of results per page (25, 50, 100, or 200). If None, use 25.",
16
+ "max_results": "int - The maximum number of papers to return (default: 25). Should be less than or equal to 100."
17
+ },
18
+ output_type="list - A list of dictionaries containing paper information.",
19
+ demo_commands=[
20
+ {
21
+ "command": 'execution = tool.execute(query="tool agents with large language models")',
22
+ "description": "Search for papers about tool agents with large language models."
23
+ },
24
+ {
25
+ "command": 'execution = tool.execute(query="quantum computing", size=100, max_results=50)',
26
+ "description": "Search for quantum computing papers, with 100 results per page, returning a maximum of 50 papers."
27
+ },
28
+ {
29
+ "command": 'execution = tool.execute(query="machine learning", max_results=75)',
30
+ "description": "Search for machine learning papers, returning a maximum of 75 papers."
31
+ },
32
+ ],
33
+ user_metadata={
34
+ "valid_sizes": [25, 50, 100, 200],
35
+ "base_url": "https://arxiv.org/search/"
36
+ }
37
+ )
38
+
39
+ def build_tool(self):
40
+ """
41
+ No specific build required for this tool.
42
+ """
43
+ pass
44
+
45
+ def execute(self, query, size=None, max_results=25):
46
+ """
47
+ Executes the arXiv search tool to find papers based on the given query.
48
+
49
+ Parameters:
50
+ query (str): The search query for arXiv papers.
51
+ size (int): The number of results per page.
52
+ max_results (int): The maximum number of papers to return.
53
+
54
+ Returns:
55
+ list: A list of dictionaries containing paper information.
56
+ """
57
+ valid_sizes = self.user_metadata["valid_sizes"]
58
+ base_url = self.user_metadata["base_url"]
59
+
60
+ if size is None:
61
+ size = 25
62
+ elif size not in valid_sizes:
63
+ size = min(valid_sizes, key=lambda x: abs(x - size))
64
+
65
+ results = []
66
+ start = 0
67
+
68
+ max_results = min(max_results, 100) # NOTE: For traffic reasons, limit to 100 results
69
+
70
+ while len(results) < max_results:
71
+ params = {
72
+ "searchtype": "all",
73
+ "query": query,
74
+ "abstracts": "show",
75
+ "order": "",
76
+ "size": str(size),
77
+ "start": str(start)
78
+ }
79
+
80
+ try:
81
+ response = requests.get(base_url, params=params)
82
+ soup = BeautifulSoup(response.content, 'html.parser')
83
+
84
+ papers = soup.find_all("li", class_="arxiv-result")
85
+ if not papers:
86
+ break
87
+
88
+ for paper in papers:
89
+ if len(results) >= max_results:
90
+ break
91
+
92
+ title = paper.find("p", class_="title").text.strip()
93
+ authors = paper.find("p", class_="authors").text.strip()
94
+ authors = re.sub(r'^Authors:\s*', '', authors)
95
+ authors = re.sub(r'\s+', ' ', authors).strip()
96
+
97
+ abstract = paper.find("span", class_="abstract-full").text.strip()
98
+ abstract = abstract.replace("△ Less", "").strip()
99
+
100
+ link = paper.find("p", class_="list-title").find("a")["href"]
101
+
102
+ results.append({
103
+ "title": title,
104
+ "authors": authors,
105
+ "abstract": abstract,
106
+ "link": f"{link}"
107
+ })
108
+
109
+ start += size
110
+
111
+ except Exception as e:
112
+ print(f"Error searching arXiv: {e}")
113
+ break
114
+
115
+ return results[:max_results]
116
+
117
+ def get_metadata(self):
118
+ """
119
+ Returns the metadata for the ArXiv_Paper_Searcher_Tool.
120
+
121
+ Returns:
122
+ dict: A dictionary containing the tool's metadata.
123
+ """
124
+ metadata = super().get_metadata()
125
+ return metadata
126
+
127
+ if __name__ == "__main__":
128
+ # Test command:
129
+ """
130
+ Run the following commands in the terminal to test the script:
131
+
132
+ cd octotools/tools/arxiv_paper_searcher
133
+ python tool.py
134
+ """
135
+
136
+ import json
137
+
138
+ print("ArXiv Search Tool Test")
139
+
140
+ # Example usage of the ArXiv_Paper_Searcher_Tool
141
+ tool = ArXiv_Paper_Searcher_Tool()
142
+
143
+ # Get tool metadata
144
+ metadata = tool.get_metadata()
145
+ print("Tool Metadata:")
146
+ print(metadata)
147
+
148
+ # Sample query for searching arXiv
149
+ query = "enhance mathematical reasoning with large language models"
150
+ # Execute the tool
151
+ try:
152
+ execution = tool.execute(query=query, size=50, max_results=10)
153
+ print("\n==>> Execution:")
154
+ print(json.dumps(execution, indent=4)) # Pretty print JSON
155
+ print("\n==>> Search Results:")
156
+ for i, paper in enumerate(execution, 1):
157
+ print(f"{i}. {paper['title']}")
158
+ print(f" Authors: {paper['authors']}")
159
+ print(f" Abstract: {paper['abstract'][:2000]}")
160
+ print(f" Link: {paper['link']}")
161
+ print()
162
+ except Exception as e:
163
+ print(f"Execution failed: {e}")
164
+
165
+ print("Done!")
octotools/tools/base.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # octotools/tools/base.py
2
+
3
+ from octotools.engine.openai import ChatOpenAI
4
+
5
+ class BaseTool:
6
+ """
7
+ A base class for building tool classes that perform specific tasks, such as image processing or text detection.
8
+ """
9
+
10
+ require_llm_engine = False # Default is False, tools that need LLM should set this to True
11
+
12
+ def __init__(self, tool_name=None, tool_description=None, tool_version=None, input_types=None, output_type=None, demo_commands=None, output_dir=None, user_metadata=None, model_string=None):
13
+ """
14
+ Initialize the base tool with optional metadata.
15
+
16
+ Parameters:
17
+ tool_name (str): The name of the tool.
18
+ tool_description (str): A description of the tool.
19
+ tool_version (str): The version of the tool.
20
+ input_types (dict): The expected input types for the tool.
21
+ output_type (str): The expected output type for the tool.
22
+ demo_commands (list): A list of example commands for using the tool.
23
+ output_dir (str): The directory where the tool should save its output (optional).
24
+ user_metadata (dict): Additional metadata specific to user needs (optional).
25
+ model_string (str): The model string for the LLM engine (optional, only used if require_llm_engine is True).
26
+ """
27
+ self.tool_name = tool_name
28
+ self.tool_description = tool_description
29
+ self.tool_version = tool_version
30
+ self.input_types = input_types
31
+ self.output_type = output_type
32
+ self.demo_commands = demo_commands
33
+ self.output_dir = output_dir
34
+ self.user_metadata = user_metadata
35
+ self.model_string = model_string
36
+
37
+ def set_metadata(self, tool_name, tool_description, tool_version, input_types, output_type, demo_commands, user_metadata=None):
38
+ """
39
+ Set the metadata for the tool.
40
+
41
+ Parameters:
42
+ tool_name (str): The name of the tool.
43
+ tool_description (str): A description of the tool.
44
+ tool_version (str): The version of the tool.
45
+ input_types (dict): The expected input types for the tool.
46
+ output_type (str): The expected output type for the tool.
47
+ demo_commands (list): A list of example commands for using the tool.
48
+ user_metadata (dict): Additional metadata specific to user needs (optional).
49
+ """
50
+ self.tool_name = tool_name
51
+ self.tool_description = tool_description
52
+ self.tool_version = tool_version
53
+ self.input_types = input_types
54
+ self.output_type = output_type
55
+ self.demo_commands = demo_commands
56
+ self.user_metadata = user_metadata
57
+
58
+ def get_metadata(self):
59
+ """
60
+ Returns the metadata for the tool.
61
+
62
+ Returns:
63
+ dict: A dictionary containing the tool's metadata.
64
+ """
65
+ metadata = {
66
+ "tool_name": self.tool_name,
67
+ "tool_description": self.tool_description,
68
+ "tool_version": self.tool_version,
69
+ "input_types": self.input_types,
70
+ "output_type": self.output_type,
71
+ "demo_commands": self.demo_commands,
72
+ "require_llm_engine": self.require_llm_engine,
73
+ }
74
+ if self.user_metadata:
75
+ metadata["user_metadata"] = self.user_metadata
76
+ return metadata
77
+
78
+ def set_custom_output_dir(self, output_dir):
79
+ """
80
+ Set a custom output directory for the tool.
81
+
82
+ Parameters:
83
+ output_dir (str): The new output directory path.
84
+ """
85
+ self.output_dir = output_dir
86
+
87
+ def set_llm_engine(self, model_string):
88
+ """
89
+ Set the LLM engine for the tool.
90
+
91
+ Parameters:
92
+ model_string (str): The model string for the LLM engine.
93
+ """
94
+ self.model_string = model_string
95
+
96
+ def execute(self, *args, **kwargs):
97
+ """
98
+ Execute the tool's main functionality. This method should be overridden by subclasses.
99
+
100
+ Raises:
101
+ NotImplementedError: If the subclass does not implement this method.
102
+ """
103
+ raise NotImplementedError("Subclasses must implement the execute method.")
octotools/tools/generalist_solution_generator/tool.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from octotools.tools.base import BaseTool
3
+ from octotools.engine.openai import ChatOpenAI
4
+
5
+ class Generalist_Solution_Generator_Tool(BaseTool):
6
+ require_llm_engine = True
7
+ require_api_key = True
8
+
9
+ def __init__(self, model_string="gpt-4o-mini", api_key=None):
10
+ super().__init__(
11
+ tool_name="Generalist_Solution_Generator_Tool",
12
+ tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
13
+ tool_version="1.0.0",
14
+ input_types={
15
+ "prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
16
+ "image": "str - The path to the image file if applicable (default: None).",
17
+ },
18
+ output_type="str - The generated response to the original query prompt",
19
+ demo_commands=[
20
+ {
21
+ "command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
22
+ "description": "Generate a short summary given the prompt from the user."
23
+ },
24
+ {
25
+ "command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
26
+ "description": "Generate a caption focusing on the mood using a specific prompt and image."
27
+ },
28
+ {
29
+ "command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
30
+ "description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
31
+ },
32
+ {
33
+ "command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
34
+ "description": "Answer a question step by step given the image."
35
+ }
36
+ ],
37
+ # # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
38
+ # user_metadata = {
39
+ # "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
40
+ # "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
41
+ # }
42
+ # vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
43
+ user_metadata = {
44
+ "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
45
+ "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
46
+ "1) Provide clear, specific prompts.\n"
47
+ "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
48
+ "3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
49
+ "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
50
+ "5) Verify important information from its responses.\n"
51
+ "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
52
+ }
53
+ # # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
54
+ # user_metadata = {
55
+ # "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
56
+ # "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
57
+ # "1) Provide clear, specific prompts.\n"
58
+ # "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
59
+ # "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
60
+ # "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
61
+ # "5) Verify important information from its responses.\n"
62
+ # "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
63
+ # }
64
+ # # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
65
+ # user_metadata = {
66
+ # "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
67
+ # "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
68
+ # "1) Provide clear, specific prompts.\n"
69
+ # "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
70
+ # "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
71
+ # "4) Verify important information from its responses.\n"
72
+ # "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
73
+ # }
74
+ )
75
+ self.model_string = model_string
76
+ self.api_key = api_key
77
+
78
+ def execute(self, prompt, image=None):
79
+
80
+ print(f"\nInitializing Generalist Tool with model: {self.model_string}")
81
+ multimodal = True if image else False
82
+ llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal, api_key=self.api_key)
83
+
84
+ try:
85
+ input_data = [prompt]
86
+ if multimodal:
87
+ if not os.path.isfile(image):
88
+ return "Error: Invalid image file path."
89
+ try:
90
+ with open(image, 'rb') as file:
91
+ image_bytes = file.read()
92
+ input_data.append(image_bytes)
93
+ except Exception as e:
94
+ return f"Error reading image file: {str(e)}"
95
+
96
+ response = llm_engine(input_data)
97
+ else:
98
+ response = llm_engine(input_data[0])
99
+ return response
100
+ except Exception as e:
101
+ return f"Error generating response: {str(e)}"
102
+
103
+ def get_metadata(self):
104
+ metadata = super().get_metadata()
105
+ return metadata
106
+
107
+ if __name__ == "__main__":
108
+ # Test command:
109
+ """
110
+ Run the following commands in the terminal to test the script:
111
+
112
+ cd octotools
113
+ python tools/default/tool.py
114
+ """
115
+
116
+ # Get the directory of the current script
117
+ script_dir = os.path.dirname(os.path.abspath(__file__))
118
+ print(f"Script directory: {script_dir}")
119
+
120
+ # Example usage of the Generalist_Tool
121
+ tool = Generalist_Solution_Generator_Tool()
122
+ # tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
123
+ # tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")
124
+
125
+ # Get tool metadata
126
+ metadata = tool.get_metadata()
127
+ print(metadata)
128
+
129
+ # Construct the full path to the image using the script's directory
130
+ relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
131
+ relative_image_path = "examples/mathvista_113.png"
132
+ image_path = os.path.join(script_dir, relative_image_path)
133
+ prompt = "Describe the image in detail."
134
+
135
+ # Execute the tool with default prompt
136
+ try:
137
+ execution = tool.execute(prompt=prompt, image=image_path)
138
+ # execution = tool.execute(prompt=prompt)
139
+ print("Generated Response:")
140
+ print(execution)
141
+ except Exception as e:
142
+ print(f"Execution failed: {e}")
143
+
144
+ print("Done!")
octotools/tools/google_search/tool.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from typing import List, Dict, Any
4
+
5
+ from octotools.tools.base import BaseTool
6
+
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ class Google_Search_Tool(BaseTool):
11
+ def __init__(self):
12
+ super().__init__(
13
+ tool_name="Google_Search_Tool",
14
+ tool_description="A tool that performs Google searches based on a given text query.",
15
+ tool_version="1.0.0",
16
+ input_types={
17
+ "query": "str - The search query to be used for the Google search.",
18
+ "num_results": "int - The number of search results to return (default: 10).",
19
+ },
20
+ output_type="list - A list of dictionaries containing search result information.",
21
+ demo_commands=[
22
+ {
23
+ "command": 'execution = tool.execute(query="Python programming")',
24
+ "description": "Perform a Google search for 'Python programming' and return the default number of results."
25
+ },
26
+ {
27
+ "command": 'execution = tool.execute(query="Machine learning tutorials", num_results=5)',
28
+ "description": "Perform a Google search for 'Machine learning tutorials' and return 5 results."
29
+ },
30
+ ],
31
+ )
32
+ # self.api_key = os.getenv("GOOGLE_API_KEY")
33
+ self.api_key = os.getenv("GOOGLE_API_KEY") # NOTE: Replace with your own API key (Ref: https://developers.google.com/custom-search/v1/introduction)
34
+ self.cx = os.getenv("GOOGLE_CX") # NOTE: Replace with your own custom search (Ref: https://programmablesearchengine.google.com/controlpanel/all)
35
+ self.base_url = "https://www.googleapis.com/customsearch/v1"
36
+
37
+ def google_search(self, query: str, num_results: int = 10) -> Dict[str, Any]:
38
+ """
39
+ Performs a Google search using the provided query.
40
+
41
+ Parameters:
42
+ query (str): The search query.
43
+ num_results (int): The number of search results to return.
44
+
45
+ Returns:
46
+ Dict[str, Any]: The raw search results from the Google API.
47
+ """
48
+ params = {
49
+ 'q': query,
50
+ 'key': self.api_key,
51
+ 'cx': self.cx,
52
+ 'num': num_results
53
+ }
54
+
55
+ response = requests.get(self.base_url, params=params)
56
+ return response.json()
57
+
58
+ def execute(self, query: str, num_results: int = 10) -> List[Dict[str, Any]]:
59
+ """
60
+ Executes a Google search based on the provided query.
61
+
62
+ Parameters:
63
+ query (str): The search query.
64
+ num_results (int): The number of search results to return (default: 10).
65
+
66
+ Returns:
67
+ List[Dict[str, Any]]: A list of dictionaries containing search result information.
68
+ """
69
+ if not self.api_key:
70
+ return [{"error": "Google API key is not set. Please set the GOOGLE_API_KEY environment variable."}]
71
+
72
+ try:
73
+ results = self.google_search(query, num_results)
74
+ print(results)
75
+
76
+ if 'items' in results:
77
+ return [
78
+ {
79
+ "title": item['title'],
80
+ "link": item['link'],
81
+ "snippet": item['snippet']
82
+ }
83
+ for item in results['items']
84
+ ]
85
+ else:
86
+ return [{"error": "No results found."}]
87
+ except Exception as e:
88
+ return [{"error": f"An error occurred: {str(e)}"}]
89
+
90
+ def get_metadata(self):
91
+ """
92
+ Returns the metadata for the Google_Search_Tool.
93
+
94
+ Returns:
95
+ dict: A dictionary containing the tool's metadata.
96
+ """
97
+ metadata = super().get_metadata()
98
+ return metadata
99
+
100
+
101
+ if __name__ == "__main__":
102
+ # Test command:
103
+ """
104
+ Run the following commands in the terminal to test the script:
105
+
106
+ export GOOGLE_API_KEY=your_api_key_here
107
+ cd octotools/tools/google_search
108
+ python tool.py
109
+ """
110
+
111
+ # Example usage of the Google_Search_Tool
112
+ tool = Google_Search_Tool()
113
+
114
+ # Get tool metadata
115
+ metadata = tool.get_metadata()
116
+ print(metadata)
117
+
118
+ # Execute the tool to perform a Google search
119
+ query = "nobel prize winners in chemistry 2024"
120
+ try:
121
+ execution = tool.execute(query=query, num_results=5)
122
+ print("\nExecution Result:")
123
+ print(f"Search query: {query}")
124
+ print(f"Number of results: {len(execution)}")
125
+ print("\nSearch Results:")
126
+ if "error" in execution[0]:
127
+ print(f"Error: {execution[0]['error']}")
128
+ else:
129
+ for i, item in enumerate(execution, 1):
130
+ print(f"\n{i}. Title: {item['title']}")
131
+ print(f" URL: {item['link']}")
132
+ print(f" Snippet: {item['snippet']}")
133
+ except Exception as e:
134
+ print(f"Execution failed: {e}")
135
+
136
+ print("Done!")
octotools/tools/image_captioner/tool.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from octotools.tools.base import BaseTool
3
+ from octotools.engine.openai import ChatOpenAI
4
+
5
+ class Image_Captioner_Tool(BaseTool):
6
+ require_llm_engine = True
7
+
8
+ def __init__(self, model_string="gpt-4o-mini"):
9
+ super().__init__(
10
+ tool_name="Image_Captioner_Tool",
11
+ tool_description="A tool that generates captions for images using OpenAI's multimodal model.",
12
+ tool_version="1.0.0",
13
+ input_types={
14
+ "image": "str - The path to the image file.",
15
+ "prompt": "str - The prompt to guide the image captioning (default: 'Describe this image in detail.').",
16
+ },
17
+ output_type="str - The generated caption for the image.",
18
+ demo_commands=[
19
+ {
20
+ "command": 'execution = tool.execute(image="path/to/image.png")',
21
+ "description": "Generate a caption for an image using the default prompt and model."
22
+ },
23
+ {
24
+ "command": 'execution = tool.execute(image="path/to/image.png", prompt="Explain the mood of this scene.")',
25
+ "description": "Generate a caption focusing on the mood using a specific prompt and model."
26
+ }
27
+ ],
28
+ user_metadata = {
29
+ "limitation": "The Image_Captioner_Tool provides general image descriptions but has limitations: 1) May make mistakes in complex scenes, counting, attribute detection, and understanding object relationships. 2) Might not generate comprehensive captions, especially for images with multiple objects or abstract concepts. 3) Performance varies with image complexity. 4) Struggles with culturally specific or domain-specific content. 5) May overlook details or misinterpret object relationships. For precise descriptions, consider: using it with other tools for context/verification, as an initial step before refinement, or in multi-step processes for ambiguity resolution. Verify critical information with specialized tools or human expertise when necessary."
30
+ },
31
+ )
32
+ print(f"\nInitializing Image Captioner Tool with model: {model_string}")
33
+ self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=True) if model_string else None
34
+
35
+ def execute(self, image, prompt="Describe this image in detail."):
36
+ try:
37
+ if not self.llm_engine:
38
+ return "Error: LLM engine not initialized. Please provide a valid model_string."
39
+
40
+ input_data = [prompt]
41
+
42
+ if image and os.path.isfile(image):
43
+ try:
44
+ with open(image, 'rb') as file:
45
+ image_bytes = file.read()
46
+ input_data.append(image_bytes)
47
+ except Exception as e:
48
+ return f"Error reading image file: {str(e)}"
49
+ else:
50
+ return "Error: Invalid image file path."
51
+
52
+ caption = self.llm_engine(input_data)
53
+ return caption
54
+ except Exception as e:
55
+ return f"Error generating caption: {str(e)}"
56
+
57
+ def get_metadata(self):
58
+ metadata = super().get_metadata()
59
+ metadata['require_llm_engine'] = self.require_llm_engine # NOTE: can be removed if not needed
60
+ return metadata
61
+
62
+ if __name__ == "__main__":
63
+ # Test command:
64
+ """
65
+ Run the following commands in the terminal to test the script:
66
+
67
+ cd octotools/tools/image_captioner
68
+ python tool.py
69
+ """
70
+
71
+ import json
72
+
73
+ # Get the directory of the current script
74
+ script_dir = os.path.dirname(os.path.abspath(__file__))
75
+
76
+ # Example usage of the Image_Captioner_Tool
77
+ # tool = Image_Captioner_Tool()
78
+ tool = Image_Captioner_Tool(model_string="gpt-4o")
79
+
80
+ # Get tool metadata
81
+ metadata = tool.get_metadata()
82
+ print(metadata)
83
+
84
+ # Construct the full path to the image using the script's directory
85
+ relative_image_path = "examples/baseball.png"
86
+ image_path = os.path.join(script_dir, relative_image_path)
87
+
88
+ # Execute the tool with default prompt
89
+ try:
90
+ execution = tool.execute(image=image_path)
91
+ print("Generated Caption:")
92
+ print(json.dumps(execution, indent=4))
93
+ except Exception as e:
94
+ print(f"Execution failed: {e}")
95
+
96
+ print("Done!")
octotools/tools/nature_news_fetcher/tool.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import time
5
+
6
+ from octotools.tools.base import BaseTool
7
+
8
+ class Nature_News_Fetcher_Tool(BaseTool):
9
+ def __init__(self):
10
+ super().__init__(
11
+ tool_name="Nature_News_Fetcher_Tool",
12
+ tool_description="A tool that fetches the latest news articles from Nature.",
13
+ tool_version="1.0.0",
14
+ input_types={
15
+ "num_articles": "int - The number of articles to fetch (default: 100).",
16
+ "max_pages": "int - The maximum number of pages to fetch (default: 5).",
17
+ },
18
+ output_type="list - A list of dictionaries containing information about the latest Nature news articles.",
19
+ demo_commands=[
20
+ {
21
+ "command": 'execution = tool.execute()',
22
+ "description": "Fetch the latest 100 news articles from Nature."
23
+ },
24
+ {
25
+ "command": 'execution = tool.execute(num_articles=50, max_pages=3)',
26
+ "description": "Fetch the latest 50 news articles from Nature, searching up to 3 pages."
27
+ },
28
+ ],
29
+ )
30
+ self.base_url = "https://www.nature.com/nature/articles"
31
+
32
+ def fetch_page(self, page_number):
33
+ """
34
+ Fetches a single page of news articles from Nature's website.
35
+
36
+ Parameters:
37
+ page_number (int): The page number to fetch.
38
+
39
+ Returns:
40
+ str: The HTML content of the page.
41
+ """
42
+ params = {
43
+ "searchType": "journalSearch",
44
+ "sort": "PubDate",
45
+ "type": "news",
46
+ "page": str(page_number)
47
+ }
48
+ headers = {
49
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
50
+ }
51
+ response = requests.get(self.base_url, params=params, headers=headers)
52
+ response.raise_for_status()
53
+ return response.text
54
+
55
+ def parse_articles(self, html_content):
56
+ """
57
+ Parses the HTML content and extracts article information.
58
+
59
+ Parameters:
60
+ html_content (str): The HTML content of the page.
61
+
62
+ Returns:
63
+ list: A list of dictionaries containing article information.
64
+ """
65
+ soup = BeautifulSoup(html_content, 'html.parser')
66
+ articles_section = soup.find('section', id='new-article-list')
67
+ if not articles_section:
68
+ return []
69
+
70
+ articles = []
71
+ for article in articles_section.find_all('article', class_='c-card'):
72
+ title_elem = article.find('h3', class_='c-card__title')
73
+ title = title_elem.text.strip() if title_elem else "No title found"
74
+
75
+ url_elem = title_elem.find('a') if title_elem else None
76
+ url = "https://www.nature.com" + url_elem['href'] if url_elem and 'href' in url_elem.attrs else "No URL found"
77
+
78
+ description_elem = article.find('div', {'data-test': 'article-description'})
79
+ description = description_elem.text.strip() if description_elem else "No description available"
80
+
81
+ authors_elem = article.find('ul', {'data-test': 'author-list'})
82
+ authors = [author.text.strip() for author in authors_elem.find_all('li')] if authors_elem else ["No authors found"]
83
+
84
+ date_elem = article.find('time')
85
+ date = date_elem['datetime'] if date_elem and 'datetime' in date_elem.attrs else "No date found"
86
+
87
+ image_elem = article.find('img')
88
+ image_url = image_elem['src'] if image_elem and 'src' in image_elem.attrs else "No image found"
89
+
90
+ articles.append({
91
+ 'title': title,
92
+ 'url': url,
93
+ 'description': description,
94
+ 'authors': authors,
95
+ 'date': date,
96
+ 'image_url': image_url
97
+ })
98
+
99
+ return articles
100
+
101
+ def execute(self, num_articles=100, max_pages=5):
102
+ """
103
+ Fetches the latest news articles from Nature's website.
104
+
105
+ Parameters:
106
+ num_articles (int): The number of articles to fetch.
107
+ max_pages (int): The maximum number of pages to fetch.
108
+
109
+ Returns:
110
+ list: A list of dictionaries containing article information.
111
+ """
112
+ all_articles = []
113
+ page_number = 1
114
+
115
+ try:
116
+ while len(all_articles) < num_articles and page_number <= max_pages:
117
+ html_content = self.fetch_page(page_number)
118
+ page_articles = self.parse_articles(html_content)
119
+
120
+ if not page_articles:
121
+ break # No more articles found
122
+
123
+ all_articles.extend(page_articles)
124
+ page_number += 1
125
+ time.sleep(1) # Be polite to the server
126
+
127
+ return all_articles[:num_articles]
128
+ except Exception as e:
129
+ return [{"error": str(e)}]
130
+
131
+ def get_metadata(self):
132
+ """
133
+ Returns the metadata for the Nature_News_Fetcher_Tool.
134
+
135
+ Returns:
136
+ dict: A dictionary containing the tool's metadata.
137
+ """
138
+ metadata = super().get_metadata()
139
+ return metadata
140
+
141
+
142
+ if __name__ == "__main__":
143
+ # Test command:
144
+ """
145
+ Run the following commands in the terminal to test the script:
146
+
147
+ cd octotools/tools/nature_news_fetcher
148
+ python tool.py
149
+ """
150
+
151
+ # Get the directory of the current script
152
+ script_dir = os.path.dirname(os.path.abspath(__file__))
153
+
154
+ # Example usage of the Nature_News_Fetcher_Tool
155
+ tool = Nature_News_Fetcher_Tool()
156
+
157
+ # Get tool metadata
158
+ metadata = tool.get_metadata()
159
+ print(metadata)
160
+
161
+ import json
162
+
163
+
164
+ # Execute the tool to fetch the latest 10 articles (for demonstration purposes)
165
+ try:
166
+ execution = tool.execute(num_articles=10, max_pages=1)
167
+ print(json.dumps(execution, indent=4))
168
+ print("\nExecution Result:")
169
+ print(f"Number of articles fetched: {len(execution)}")
170
+ print("\nSample articles:")
171
+ for i, article in enumerate(execution[:10], 1):
172
+ print(f"\n{i}. Title: {article['title']}")
173
+ print(f" URL: {article['url']}")
174
+ print(f" Description: {article['description'][:100]}...") # Show first 100 characters
175
+ print(f" Authors: {', '.join(article['authors'])}")
176
+ print(f" Date: {article['date']}")
177
+ print(f" Image URL: {article['image_url']}")
178
+ except Exception as e:
179
+ print(f"Execution failed: {e}")
180
+
181
+ print("Done!")
octotools/tools/object_detector/tool.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Grounding DINO Object Detection Tool
2
+ # https://huggingface.co/IDEA-Research/grounding-dino
3
+
4
+ import os
5
+ import time
6
+ import torch
7
+ from transformers import pipeline
8
+
9
+ from octotools.tools.base import BaseTool
10
+ from PIL import Image, ImageOps
11
+
12
+ import os
13
+ # If CUDA_HOME is set, print the value
14
+ print(os.environ.get('CUDA_HOME', 'CUDA_HOME is not set'))
15
+
16
+ # Suppress stderr by redirecting it to /dev/null
17
+ import sys
18
+ sys.stderr = open(os.devnull, 'w')
19
+
20
+ import warnings
21
+ warnings.filterwarnings("ignore")
22
+
23
+
24
+ class Object_Detector_Tool(BaseTool):
25
+ def __init__(self):
26
+ super().__init__(
27
+ tool_name="Object_Detector_Tool",
28
+ tool_description="A tool that detects objects in an image using the Grounding DINO model and saves individual object images with empty padding.",
29
+ tool_version="1.0.0",
30
+ input_types={
31
+ "image": "str - The path to the image file.",
32
+ "labels": "list - A list of object labels to detect.",
33
+ "threshold": "float - The confidence threshold for detection (default: 0.35).",
34
+ "model_size": "str - The size of the model to use ('tiny' or 'base', default: 'tiny').",
35
+ "padding": "int - The number of pixels to add as empty padding around detected objects (default: 20)."
36
+ },
37
+ output_type="list - A list of detected objects with their scores, bounding boxes, and saved image paths.",
38
+ demo_commands=[
39
+ {
40
+ "command": 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])',
41
+ "description": "Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths."
42
+ },
43
+ {
44
+ "command": 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)',
45
+ "description": "Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths."
46
+ }
47
+ ],
48
+ user_metadata={
49
+ "limitation": "The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification."
50
+ }
51
+ )
52
+
53
+ def preprocess_caption(self, caption):
54
+ result = caption.lower().strip()
55
+ if result.endswith("."):
56
+ return result
57
+ return result + "."
58
+
59
+ def build_tool(self, model_size='tiny'):
60
+ model_name = f"IDEA-Research/grounding-dino-{model_size}"
61
+ device = "cuda" if torch.cuda.is_available() else "cpu"
62
+ try:
63
+ pipe = pipeline(model=model_name, task="zero-shot-object-detection", device=device)
64
+ return pipe
65
+ except Exception as e:
66
+ print(f"Error building the Object Detection tool: {e}")
67
+ return None
68
+
69
+ def save_detected_object(self, image, box, image_name, label, index, padding):
70
+ object_image = image.crop(box)
71
+ padded_image = ImageOps.expand(object_image, border=padding, fill='white')
72
+
73
+ filename = f"{image_name}_{label}_{index}.png"
74
+ os.makedirs(self.output_dir, exist_ok=True)
75
+ save_path = os.path.join(self.output_dir, filename)
76
+
77
+ padded_image.save(save_path)
78
+ return save_path
79
+
80
+ def execute(self, image, labels, threshold=0.35, model_size='tiny', padding=20, max_retries=10, retry_delay=5, clear_cuda_cache=False):
81
+ for attempt in range(max_retries):
82
+ try:
83
+ saved_files = []
84
+
85
+ pipe = self.build_tool(model_size)
86
+ if pipe is None:
87
+ raise ValueError("Failed to build the Object Detection tool.")
88
+
89
+ preprocessed_labels = [self.preprocess_caption(label) for label in labels]
90
+ results = pipe(image, candidate_labels=preprocessed_labels, threshold=threshold)
91
+
92
+ formatted_results = []
93
+ original_image = Image.open(image)
94
+ image_name = os.path.splitext(os.path.basename(image))[0]
95
+
96
+ object_counts = {}
97
+
98
+ for result in results:
99
+ box = tuple(result["box"].values())
100
+ label = result["label"]
101
+ score = round(result["score"], 2)
102
+ if label.endswith("."):
103
+ label = label[:-1]
104
+
105
+ object_counts[label] = object_counts.get(label, 0) + 1
106
+ index = object_counts[label]
107
+
108
+ save_path = self.save_detected_object(original_image, box, image_name, label, index, padding)
109
+
110
+ formatted_results.append({
111
+ "label": label,
112
+ "confidence score": score,
113
+ "box": box,
114
+ "saved_image_path": save_path
115
+ })
116
+
117
+ return formatted_results
118
+
119
+ except RuntimeError as e:
120
+ if "CUDA out of memory" in str(e):
121
+ print(f"CUDA out of memory error on attempt {attempt + 1}.")
122
+ if clear_cuda_cache:
123
+ print("Clearing CUDA cache and retrying...")
124
+ torch.cuda.empty_cache()
125
+ else:
126
+ print(f"Retrying in {retry_delay} seconds...")
127
+ time.sleep(retry_delay)
128
+ continue
129
+ else:
130
+ print(f"Runtime error: {e}")
131
+ break
132
+ except Exception as e:
133
+ print(f"Error detecting objects: {e}")
134
+ break
135
+
136
+ print(f"Failed to detect objects after {max_retries} attempts.")
137
+ return []
138
+
139
+ def get_metadata(self):
140
+ metadata = super().get_metadata()
141
+ return metadata
142
+
143
+ if __name__ == "__main__":
144
+ # Test command:
145
+ """
146
+ Run the following commands in the terminal to test the script:
147
+
148
+ cd octotools/tools/object_detector
149
+ python tool.py
150
+ """
151
+
152
+ # Get the directory of the current script
153
+ script_dir = os.path.dirname(os.path.abspath(__file__))
154
+
155
+ # Example usage of the Object_Detector_Tool
156
+ tool = Object_Detector_Tool()
157
+ tool.set_custom_output_dir("detected_objects")
158
+
159
+ # Get tool metadata
160
+ metadata = tool.get_metadata()
161
+ print(metadata)
162
+
163
+ # Construct the full path to the image using the script's directory
164
+ relative_image_path = "examples/baseball.png"
165
+ image_path = os.path.join(script_dir, relative_image_path)
166
+
167
+ # Execute the tool
168
+ try:
169
+ execution = tool.execute(image=image_path, labels=["baseball", "basket"], padding=20)
170
+ print("Detected Objects:")
171
+ for obj in execution:
172
+ print(f"Detected {obj['label']} with confidence {obj['confidence score']}")
173
+ print(f"Bounding box: {obj['box']}")
174
+ print(f"Saved image (with padding): {obj['saved_image_path']}")
175
+ print()
176
+ except ValueError as e:
177
+ print(f"Execution failed: {e}")
178
+
179
+ print("Done!")
octotools/tools/pubmed_search/tool.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from pymed import PubMed
4
+ from metapub import PubMedFetcher
5
+ from octotools.tools.base import BaseTool
6
+ from tenacity import (
7
+ retry,
8
+ stop_after_attempt,
9
+ wait_random_exponential,
10
+ )
11
+
12
+ # Suppress stderr by redirecting it to /dev/null
13
+ import sys
14
+ sys.stderr = open(os.devnull, 'w')
15
+
16
+ import warnings
17
+ warnings.filterwarnings("ignore")
18
+
19
+
20
+ class Pubmed_Search_Tool(BaseTool):
21
+ def __init__(self):
22
+ super().__init__(
23
+ tool_name="Pubmed_Search_Tool",
24
+ tool_description="A tool that searches PubMed Central to retrieve relevant article abstracts based on a given list of text queries. Use this ONLY if you cannot use the other more specific ontology tools.",
25
+ tool_version="1.0.0",
26
+ input_types={
27
+ "queries": "list[str] - list of queries terms for searching PubMed."
28
+ },
29
+ output_type="list - List of items matching the search query. Each item consists of the title, abstract, keywords, and URL of the article. If no results found, a string message is returned.",
30
+ demo_commands=[
31
+ {
32
+ "command": 'execution = tool.execute(queries=["scoliosis", "injury"])',
33
+ "description": "Search for PubMed articles mentioning 'scoliosis' OR 'injury'."
34
+ },
35
+ {
36
+ "command": 'execution = tool.execute(queries=["COVID", "vaccine", "occupational health"])',
37
+ "description": "Search for PubMed articles mentioning 'COVID' OR 'vaccine' OR 'occupational health'."
38
+ }
39
+ ],
40
+ user_metadata={
41
+ 'limitations': "Try to use shorter and more general search queries."
42
+ }
43
+ )
44
+ self.pubmed = PubMed(tool="MyTool", email="[email protected]")
45
+ self.fetch = PubMedFetcher()
46
+
47
+ @retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(3))
48
+ def search_query(self, query_str, max_results=10):
49
+ return self.pubmed.query(query_str, max_results=max_results)
50
+
51
+ def execute(self, queries, max_results=10):
52
+ try:
53
+ query_str = f"({'[Title/Abstract] OR '.join(queries) + '[Title/Abstract]'}) AND hasabstract[All Fields] AND fha[Filter]"
54
+ max_results = min(max_results, 50)
55
+
56
+ results = self.search_query(query_str, max_results=max_results) # API can only get most recent
57
+
58
+ items = []
59
+ for article in results:
60
+ try:
61
+ article = json.loads(article.toJSON())
62
+ pubmed_id = article['pubmed_id'] # get id using pymed then get content using metapub
63
+
64
+ article = self.fetch.article_by_pmid(pubmed_id)
65
+ items.append({
66
+ 'title': article.title,
67
+ 'abstract': article.abstract,
68
+ 'keywords': article.keywords,
69
+ 'url': article.url
70
+ })
71
+ except:
72
+ continue
73
+
74
+ if len(items) == 0:
75
+ return "No results found for search query. Try another query or tool."
76
+
77
+ return items
78
+
79
+ except Exception as e:
80
+ print(f"Error searching PubMed: {e}")
81
+ return []
82
+
83
+ def get_metadata(self):
84
+ metadata = super().get_metadata()
85
+ return metadata
86
+
87
+ if __name__ == "__main__":
88
+ # Test command:
89
+ """
90
+ Run the following commands in the terminal to test the script:
91
+
92
+ cd octotools/tools/pubmed_search
93
+ python tool.py
94
+ """
95
+
96
+ # Get the directory of the current script
97
+ script_dir = os.path.dirname(os.path.abspath(__file__))
98
+
99
+ # Example usage
100
+ tool = Pubmed_Search_Tool()
101
+
102
+ # Queries
103
+ queries = ["COVID occupational health"]
104
+
105
+ # Execute the tool
106
+ try:
107
+ execution = tool.execute(queries=queries)
108
+ print(execution)
109
+ except ValueError as e:
110
+ print(f"Execution failed: {e}")
111
+
112
+ print("Done!")
octotools/tools/python_code_generator/tool.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # octotools/tools/python_code_generator/tool.py
2
+
3
+ import os
4
+ import re
5
+ import sys
6
+ from io import StringIO
7
+ import contextlib
8
+
9
+
10
+ from octotools.tools.base import BaseTool
11
+ from octotools.engine.openai import ChatOpenAI
12
+
13
+ import signal
14
+ from contextlib import contextmanager
15
+
16
+ # Custom exception for code execution timeout
17
+ class TimeoutException(Exception):
18
+ pass
19
+
20
+ # Custom context manager for code execution timeout
21
+ @contextmanager
22
+ def timeout(seconds):
23
+ def timeout_handler(signum, frame):
24
+ raise TimeoutException("Code execution timed out")
25
+
26
+ # Set the timeout handler
27
+ original_handler = signal.signal(signal.SIGALRM, timeout_handler)
28
+ signal.alarm(seconds)
29
+
30
+ try:
31
+ yield
32
+ finally:
33
+ # Restore the original handler and disable the alarm
34
+ signal.alarm(0)
35
+ signal.signal(signal.SIGALRM, original_handler)
36
+
37
+
38
+ class Python_Code_Generator_Tool(BaseTool):
39
+ require_llm_engine = True
40
+
41
+ def __init__(self, model_string="gpt-4o-mini"):
42
+ super().__init__(
43
+ tool_name="Python_Code_Generator_Tool",
44
+ tool_description="A tool that generates and executes simple Python code snippets for basic arithmetical calculations and math-related problems. The generated code runs in a highly restricted environment with only basic mathematical operations available.",
45
+ tool_version="1.0.0",
46
+ input_types={
47
+ "query": "str - A clear, specific description of the arithmetic calculation or math problem to be solved, including any necessary numerical inputs."},
48
+ output_type="dict - A dictionary containing the generated code, calculation result, and any error messages.",
49
+ demo_commands=[
50
+ {
51
+ "command": 'execution = tool.execute(query="Calculate the factorial of 5")',
52
+ "description": "Generate a Python code snippet to calculate the factorial of 5."
53
+ },
54
+ {
55
+ "command": 'execution = tool.execute(query="Find the sum of prime numbers up to 50")',
56
+ "description": "Generate a Python code snippet to find the sum of prime numbers up to 50."
57
+ },
58
+ {
59
+ "command": 'query="Given the list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], calculate the sum of squares of odd numbers"\nexecution = tool.execute(query=query)',
60
+ "description": "Generate a Python function for a specific mathematical operation on a given list of numbers."
61
+ },
62
+ ],
63
+ user_metadata = {
64
+ "limitations": [
65
+ "Restricted to basic Python arithmetic operations and built-in mathematical functions.",
66
+ "Cannot use any external libraries or modules, including those in the Python standard library.",
67
+ "Limited to simple mathematical calculations and problems.",
68
+ "Cannot perform any string processing, data structure manipulation, or complex algorithms.",
69
+ "No access to any system resources, file operations, or network requests.",
70
+ "Cannot use 'import' statements.",
71
+ "All calculations must be self-contained within a single function or script.",
72
+ "Input must be provided directly in the query string.",
73
+ "Output is limited to numerical results or simple lists/tuples of numbers."
74
+ ],
75
+ "best_practices": [
76
+ "Provide clear and specific queries that describe the desired mathematical calculation.",
77
+ "Include all necessary numerical inputs directly in the query string.",
78
+ "Keep tasks focused on basic arithmetic, algebraic calculations, or simple mathematical algorithms.",
79
+ "Ensure all required numerical data is included in the query.",
80
+ "Verify that the query only involves mathematical operations and does not require any data processing or complex algorithms.",
81
+ "Review generated code to ensure it only uses basic Python arithmetic operations and built-in math functions."
82
+ ]
83
+ }
84
+ )
85
+ print(f"\nInitializing Python_Code_Generator_Tool with model_string: {model_string}")
86
+ self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=False) if model_string else None
87
+
88
+ @staticmethod
89
+ def preprocess_code(code):
90
+ """
91
+ Preprocesses the generated code snippet by extracting it from the response.
92
+
93
+ Parameters:
94
+ code (str): The response containing the code snippet.
95
+
96
+ Returns:
97
+ str: The extracted code snippet.
98
+ """
99
+ code = re.search(r"```python(.*)```", code, re.DOTALL).group(1).strip()
100
+ return code
101
+
102
+ @contextlib.contextmanager
103
+ def capture_output(self):
104
+ """
105
+ Context manager to capture the standard output.
106
+
107
+ Yields:
108
+ StringIO: The captured output.
109
+ """
110
+ new_out = StringIO()
111
+ old_out = sys.stdout
112
+ sys.stdout = new_out
113
+ try:
114
+ yield sys.stdout
115
+ finally:
116
+ sys.stdout = old_out
117
+
118
+ def execute_code_snippet(self, code):
119
+ """
120
+ Executes the given Python code snippet.
121
+
122
+ Parameters:
123
+ code (str): The Python code snippet to be executed.
124
+
125
+ Returns:
126
+ dict: A dictionary containing the printed output and local variables.
127
+ """
128
+ # Check for dangerous functions and remove them
129
+ dangerous_functions = ['exit', 'quit', 'sys.exit']
130
+ for func in dangerous_functions:
131
+ if func in code:
132
+ print(f"Warning: Removing unsafe '{func}' call from code")
133
+ # Use regex to remove function calls with any arguments
134
+ code = re.sub(rf'{func}\s*\([^)]*\)', 'break', code)
135
+
136
+ try:
137
+ execution_code = self.preprocess_code(code)
138
+
139
+ # Execute with 10-second timeout
140
+ with timeout(10):
141
+ try:
142
+ exec(execution_code)
143
+ except TimeoutException:
144
+ print("Error: Code execution exceeded 60 seconds timeout")
145
+ return {"error": "Execution timed out after 60 seconds"}
146
+ except Exception as e:
147
+ print(f"Error executing code: {e}")
148
+ return {"error": str(e)}
149
+
150
+ # Capture the output and local variables
151
+ local_vars = {}
152
+ with self.capture_output() as output:
153
+ exec(execution_code, {}, local_vars)
154
+ printed_output = output.getvalue().strip()
155
+
156
+ # Filter out built-in variables and modules
157
+ """
158
+ only the variables used in the code are returned,
159
+ excluding built-in variables (which start with '__') and imported modules.
160
+ """
161
+ used_vars = {k: v for k, v in local_vars.items()
162
+ if not k.startswith('__') and not isinstance(v, type(sys))}
163
+
164
+ return {"printed_output": printed_output, "variables": used_vars}
165
+
166
+ except Exception as e:
167
+ print(f"Error executing code: {e}")
168
+ return {"error": str(e)}
169
+
170
+ def execute(self, query):
171
+ """
172
+ Generates and executes Python code based on the provided query.
173
+
174
+ Parameters:
175
+ query (str): A query describing the desired operation.
176
+
177
+ Returns:
178
+ dict: A dictionary containing the executed output, local variables, or any error message.
179
+ """
180
+
181
+ if not self.llm_engine:
182
+ raise ValueError("LLM engine not initialized. Please provide a valid model_string when initializing the tool.")
183
+
184
+ task_description = """
185
+ Given a query, generate a Python code snippet that performs the specified operation on the provided data. Please think step by step. Ensure to break down the process into clear, logical steps. Make sure to print the final result in the generated code snippet with a descriptive message explaining what the output represents. The final output should be presented in the following format:
186
+
187
+ ```python
188
+ <code snippet>
189
+ ```
190
+ """
191
+ task_description = task_description.strip()
192
+ full_prompt = f"Task:\n{task_description}\n\nQuery:\n{query}"
193
+
194
+ response = self.llm_engine(full_prompt)
195
+ result_or_error = self.execute_code_snippet(response)
196
+ return result_or_error
197
+
198
+ def get_metadata(self):
199
+ """
200
+ Returns the metadata for the Python_Code_Generator_Tool.
201
+
202
+ Returns:
203
+ dict: A dictionary containing the tool's metadata.
204
+ """
205
+ metadata = super().get_metadata()
206
+ metadata["require_llm_engine"] = self.require_llm_engine # NOTE: can be removed if not needed
207
+ return metadata
208
+
209
+
210
+ if __name__ == "__main__":
211
+ # Test command:
212
+ """
213
+ Run the following commands in the terminal to test the script:
214
+
215
+ cd octotools/tools/python_code_generator
216
+ python tool.py
217
+ """
218
+
219
+ # Get the directory of the current script
220
+ script_dir = os.path.dirname(os.path.abspath(__file__))
221
+
222
+ # Example usage of the Python_Code_Generator_Tool
223
+ tool = Python_Code_Generator_Tool()
224
+ tool = Python_Code_Generator_Tool(model_string="gpt-4o-mini")
225
+
226
+ # Get tool metadata
227
+ metadata = tool.get_metadata()
228
+ print(metadata)
229
+
230
+ # Sample query for generating and executing Python code
231
+ queries = [
232
+ "Given the number list: [1, 2, 3, 4, 5], calculate the sum of all the numbers in the list.",
233
+ ]
234
+ for query in queries:
235
+ print(f"\n###Query: {query}")
236
+ # Execute the tool with the sample query
237
+ try:
238
+ execution = tool.execute(query=query)
239
+ print("\n###Execution Result:", execution)
240
+ except ValueError as e:
241
+ print(f"Execution failed: {e}")
242
+
243
+ print("Done!")
octotools/tools/relevant_patch_zoomer/tool.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ from pydantic import BaseModel
4
+ from octotools.tools.base import BaseTool
5
+ from octotools.engine.openai import ChatOpenAI
6
+
7
+ class PatchZoomerResponse(BaseModel):
8
+ analysis: str
9
+ patch: list[str]
10
+
11
+ class Relevant_Patch_Zoomer_Tool(BaseTool):
12
+ require_llm_engine = True
13
+
14
+ def __init__(self, model_string="gpt-4o"):
15
+ super().__init__(
16
+ tool_name="Relevant_Patch_Zoomer_Tool",
17
+ tool_description="A tool that analyzes an image, divides it into 5 regions (4 quarters + center), and identifies the most relevant patches based on a question. The returned patches are zoomed in by a factor of 2.",
18
+ tool_version="1.0.0",
19
+ input_types={
20
+ "image": "str - The path to the image file.",
21
+ "question": "str - The question about the image content.",
22
+ },
23
+ output_type="dict - Contains analysis text and list of saved zoomed patch paths.",
24
+ demo_commands=[
25
+ {
26
+ "command": 'execution = tool.execute(image="path/to/image.jpg", question="What is the color of the car?")',
27
+ "description": "Analyze image and return relevant zoomed patches that show the car's color."
28
+ }
29
+ ],
30
+ user_metadata = {
31
+ "best_practices": [
32
+ "It might be helpful to zoom in on the image first to get a better look at the object(s).",
33
+ "It might be helpful if the question requires a close-up view of the object(s), symbols, texts, etc.",
34
+ "The tool should be used to provide a high-level analysis first, and then use other tools for fine-grained analysis. For example, you can use Relevant_Patch_Zoomer_Tool first to get a zoomed patch of specific objects, and then use Image_Captioner_Tool to describe the objects in detail."
35
+ ]
36
+ }
37
+ )
38
+ self.matching_dict = {
39
+ "A": "top-left",
40
+ "B": "top-right",
41
+ "C": "bottom-left",
42
+ "D": "bottom-right",
43
+ "E": "center"
44
+ }
45
+
46
+ print(f"\nInitializing Patch Zoomer Tool with model: {model_string}")
47
+ self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=True) if model_string else None
48
+
49
+ def _save_patch(self, image_path, patch, save_path, zoom_factor=2):
50
+ """Extract and save a specific patch from the image with 10% margins."""
51
+ img = cv2.imread(image_path)
52
+ height, width = img.shape[:2]
53
+
54
+ quarter_h = height // 2
55
+ quarter_w = width // 2
56
+
57
+ margin_h = int(quarter_h * 0.1)
58
+ margin_w = int(quarter_w * 0.1)
59
+
60
+ patch_coords = {
61
+ 'A': ((max(0, 0 - margin_w), max(0, 0 - margin_h)),
62
+ (min(width, quarter_w + margin_w), min(height, quarter_h + margin_h))),
63
+ 'B': ((max(0, quarter_w - margin_w), max(0, 0 - margin_h)),
64
+ (min(width, width + margin_w), min(height, quarter_h + margin_h))),
65
+ 'C': ((max(0, 0 - margin_w), max(0, quarter_h - margin_h)),
66
+ (min(width, quarter_w + margin_w), min(height, height + margin_h))),
67
+ 'D': ((max(0, quarter_w - margin_w), max(0, quarter_h - margin_h)),
68
+ (min(width, width + margin_w), min(height, height + margin_h))),
69
+ 'E': ((max(0, quarter_w//2 - margin_w), max(0, quarter_h//2 - margin_h)),
70
+ (min(width, quarter_w//2 + quarter_w + margin_w),
71
+ min(height, quarter_h//2 + quarter_h + margin_h)))
72
+ }
73
+
74
+ (x1, y1), (x2, y2) = patch_coords[patch]
75
+ patch_img = img[y1:y2, x1:x2]
76
+
77
+ zoomed_patch = cv2.resize(patch_img,
78
+ (patch_img.shape[1] * zoom_factor,
79
+ patch_img.shape[0] * zoom_factor),
80
+ interpolation=cv2.INTER_LINEAR)
81
+
82
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
83
+ cv2.imwrite(save_path, zoomed_patch)
84
+ return save_path
85
+
86
+ def execute(self, image, question, zoom_factor=2):
87
+ try:
88
+ if not self.llm_engine:
89
+ return "Error: LLM engine not initialized. Please provide a valid model_string."
90
+
91
+ # Prepare the prompt
92
+ prompt = f"""
93
+ Analyze this image to identify the most relevant region(s) for answering the question:
94
+
95
+ Question: {question}
96
+
97
+ The image is divided into 5 regions:
98
+ - (A) Top-left quarter
99
+ - (B) Top-right quarter
100
+ - (C) Bottom-left quarter
101
+ - (D) Bottom-right quarter
102
+ - (E) Center region (1/4 size, overlapping middle section)
103
+
104
+ Instructions:
105
+ 1. First describe what you see in each of the five regions.
106
+ 2. Then select the most relevant region(s) to answer the question.
107
+ 3. Choose only the minimum necessary regions - avoid selecting redundant areas that show the same content. For example, if one patch contains the entire object(s), do not select another patch that only shows a part of the same object(s).
108
+
109
+
110
+ Response format:
111
+ <analysis>: Describe the image and five patches first. Then analyze the question and select the most relevant patch or list of patches.
112
+ <patch>: List of letters (A-E)
113
+ """
114
+ # Read image and create input data
115
+ with open(image, 'rb') as file:
116
+ image_bytes = file.read()
117
+ input_data = [prompt, image_bytes]
118
+
119
+ # Get response from LLM
120
+ response = self.llm_engine(input_data, response_format=PatchZoomerResponse)
121
+
122
+ # Save patches
123
+ image_dir = os.path.dirname(image)
124
+ image_name = os.path.splitext(os.path.basename(image))[0]
125
+
126
+ # Update the return structure
127
+ patch_info = []
128
+ for patch in response.patch:
129
+ patch_name = self.matching_dict[patch]
130
+ save_path = os.path.join(self.output_dir,
131
+ f"{image_name}_{patch_name}_zoomed_{zoom_factor}x.png")
132
+ saved_path = self._save_patch(image, patch, save_path, zoom_factor)
133
+ save_path = os.path.abspath(saved_path)
134
+ patch_info.append({
135
+ "path": save_path,
136
+ "description": f"The {self.matching_dict[patch]} region of the image: {image}."
137
+ })
138
+
139
+ return {
140
+ "analysis": response.analysis,
141
+ "patches": patch_info
142
+ }
143
+
144
+ except Exception as e:
145
+ print(f"Error in patch zooming: {e}")
146
+ return None
147
+
148
+ def get_metadata(self):
149
+ return super().get_metadata()
150
+
151
+ if __name__ == "__main__":
152
+ # Test command:
153
+ """
154
+ Run the following commands in the terminal to test the script:
155
+
156
+ cd octotools/tools/relevant_patch_zoomer
157
+ python tool.py
158
+ """
159
+
160
+ # Get the directory of the current script
161
+ script_dir = os.path.dirname(os.path.abspath(__file__))
162
+
163
+ # Example usage of the Relevant_Patch_Zoomer_Tool
164
+ tool = Relevant_Patch_Zoomer_Tool()
165
+ tool.set_custom_output_dir(f"{script_dir}/zoomed_patches")
166
+
167
+ # Get tool metadata
168
+ metadata = tool.get_metadata()
169
+ print(metadata)
170
+
171
+ # Construct the full path to the image using the script's directory
172
+ relative_image_path = "examples/car.png"
173
+ image_path = os.path.join(script_dir, relative_image_path)
174
+ question = "What is the color of the car?"
175
+
176
+ # Execute the tool
177
+ try:
178
+ result = tool.execute(image=image_path, question=question)
179
+ if result:
180
+ print("\nDetected Patches:")
181
+ for patch in result['patches']:
182
+ print(f"Path: {patch['path']}")
183
+ print(f"Description: {patch['description']}")
184
+ print()
185
+ except Exception as e:
186
+ print(f"Execution failed: {e}")
187
+
188
+ print("Done!")
octotools/tools/text_detector/tool.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # octotools/tools/text_detector/tool.py
2
+
3
+ import os
4
+ import time
5
+ from octotools.tools.base import BaseTool
6
+
7
+ import warnings
8
+ warnings.filterwarnings("ignore")
9
+
10
+ class Text_Detector_Tool(BaseTool):
11
+ def __init__(self):
12
+ super().__init__(
13
+ tool_name="Text_Detector_Tool",
14
+ tool_description="A tool that detects text in an image using EasyOCR.",
15
+ tool_version="1.0.0",
16
+ input_types={
17
+ "image": "str - The path to the image file.",
18
+ "languages": "list - A list of language codes for the OCR model.",
19
+ "detail": "int - The level of detail in the output. Set to 0 for simpler output, 1 for detailed output."
20
+ },
21
+ output_type="list - A list of detected text blocks.",
22
+ demo_commands=[
23
+ {
24
+ "command": 'execution = tool.execute(image="path/to/image.png", languages=["en"])',
25
+ "description": "Detect text in an image using the default language (English)."
26
+ },
27
+ {
28
+ "command": 'execution = tool.execute(image="path/to/image.png", languages=["en", "de"])',
29
+ "description": "Detect text in an image using multiple languages (English and German)."
30
+ },
31
+ {
32
+ "command": 'execution = tool.execute(image="path/to/image.png", languages=["en"], detail=0)',
33
+ "description": "Detect text in an image with simpler output (text without coordinates and scores)."
34
+ },
35
+ ],
36
+ user_metadata={
37
+ "frequently_used_language": {
38
+ "ch_sim": "Simplified Chinese",
39
+ "ch_tra": "Traditional Chinese",
40
+ "de": "German",
41
+ "en": "English",
42
+ "es": "Spanish",
43
+ "fr": "French",
44
+ "hi": "Hindi",
45
+ "ja": "Japanese",
46
+ }
47
+ }
48
+ )
49
+
50
+ def build_tool(self, languages=None):
51
+ """
52
+ Builds and returns the EasyOCR reader model.
53
+
54
+ Parameters:
55
+ languages (list): A list of language codes for the OCR model.
56
+
57
+ Returns:
58
+ easyocr.Reader: An initialized EasyOCR Reader object.
59
+ """
60
+ languages = languages or ["en"] # Default to English if no languages provided
61
+ try:
62
+ import easyocr
63
+ reader = easyocr.Reader(languages)
64
+ return reader
65
+ except ImportError:
66
+ raise ImportError("Please install the EasyOCR package using 'pip install easyocr'.")
67
+ except Exception as e:
68
+ print(f"Error building the OCR tool: {e}")
69
+ return None
70
+
71
+ def execute(self, image, languages=None, max_retries=10, retry_delay=5, clear_cuda_cache=False, **kwargs):
72
+ """
73
+ Executes the OCR tool to detect text in the provided image.
74
+
75
+ Parameters:
76
+ image (str): The path to the image file.
77
+ languages (list): A list of language codes for the OCR model.
78
+ max_retries (int): Maximum number of retry attempts.
79
+ retry_delay (int): Delay in seconds between retry attempts.
80
+ clear_cuda_cache (bool): Whether to clear CUDA cache on out-of-memory errors.
81
+ **kwargs: Additional keyword arguments for the OCR reader.
82
+
83
+ Returns:
84
+ list: A list of detected text blocks.
85
+ """
86
+ languages = languages or ["en"]
87
+
88
+ for attempt in range(max_retries):
89
+ try:
90
+ reader = self.build_tool(languages)
91
+ if reader is None:
92
+ raise ValueError("Failed to build the OCR tool.")
93
+
94
+ result = reader.readtext(image, **kwargs)
95
+ try:
96
+ # detail = 1: Convert numpy types to standard Python types
97
+ cleaned_result = [
98
+ ([[int(coord[0]), int(coord[1])] for coord in item[0]], item[1], round(float(item[2]), 2))
99
+ for item in result
100
+ ]
101
+ return cleaned_result
102
+ except Exception as e:
103
+ # detail = 0
104
+ return result
105
+
106
+ except RuntimeError as e:
107
+ if "CUDA out of memory" in str(e):
108
+ print(f"CUDA out of memory error on attempt {attempt + 1}.")
109
+ if clear_cuda_cache:
110
+ print("Clearing CUDA cache and retrying...")
111
+ torch.cuda.empty_cache()
112
+ else:
113
+ print(f"Retrying in {retry_delay} seconds...")
114
+ time.sleep(retry_delay)
115
+ continue
116
+ else:
117
+ print(f"Runtime error: {e}")
118
+ break
119
+ except Exception as e:
120
+ print(f"Error detecting text: {e}")
121
+ break
122
+
123
+ print(f"Failed to detect text after {max_retries} attempts.")
124
+ return []
125
+
126
+ def get_metadata(self):
127
+ """
128
+ Returns the metadata for the Text_Detector_Tool.
129
+
130
+ Returns:
131
+ dict: A dictionary containing the tool's metadata.
132
+ """
133
+ metadata = super().get_metadata()
134
+ return metadata
135
+
136
+ if __name__ == "__main__":
137
+ # Test command:
138
+ """
139
+ Run the following commands in the terminal to test the script:
140
+
141
+ cd octotools/tools/text_detector
142
+ python tool.py
143
+ """
144
+ import json
145
+
146
+ # Get the directory of the current script
147
+ script_dir = os.path.dirname(os.path.abspath(__file__))
148
+
149
+ # Example usage of the Text_Detector_Tool
150
+ tool = Text_Detector_Tool()
151
+
152
+ # Get tool metadata
153
+ metadata = tool.get_metadata()
154
+ print(metadata)
155
+
156
+ # Construct the full path to the image using the script's directory
157
+ # relative_image_path = "examples/chinese_tra.jpg"
158
+ # relative_image_path = "examples/chinese.jpg"
159
+ relative_image_path = "examples/english.png"
160
+ image_path = os.path.join(script_dir, relative_image_path)
161
+
162
+ # Execute the tool
163
+ try:
164
+ # execution = tool.execute(image=image_path, languages=["en", "ch_sim"])
165
+ # execution = tool.execute(image=image_path, languages=["en", "ch_tra"])
166
+ execution = tool.execute(image=image_path, languages=["en"])
167
+ print(json.dumps(execution))
168
+
169
+ print("Detected Text:", execution)
170
+ except ValueError as e:
171
+ print(f"Execution failed: {e}")
172
+
173
+ print("Done!")
octotools/tools/url_text_extractor/tool.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ from octotools.tools.base import BaseTool
6
+
7
+ class URL_Text_Extractor_Tool(BaseTool):
8
+ def __init__(self):
9
+ super().__init__(
10
+ tool_name="URL_Text_Extractor_Tool",
11
+ tool_description="A tool that extracts all text from a given URL.",
12
+ tool_version="1.0.0",
13
+ input_types={
14
+ "url": "str - The URL from which to extract text.",
15
+ },
16
+ output_type="dict - A dictionary containing the extracted text and any error messages.",
17
+ demo_commands=[
18
+ {
19
+ "command": 'execution = tool.execute(url="https://example.com")',
20
+ "description": "Extract all text from the example.com website."
21
+ },
22
+ {
23
+ "command": 'execution = tool.execute(url="https://en.wikipedia.org/wiki/Python_(programming_language)")',
24
+ "description": "Extract all text from the Wikipedia page about Python programming language."
25
+ },
26
+ ],
27
+ )
28
+
29
+ def extract_text_from_url(self, url):
30
+ """
31
+ Extracts all text from the given URL.
32
+
33
+ Parameters:
34
+ url (str): The URL from which to extract text.
35
+
36
+ Returns:
37
+ str: The extracted text.
38
+ """
39
+ url = url.replace("arxiv.org/pdf", "arxiv.org/abs")
40
+
41
+ try:
42
+ response = requests.get(url)
43
+ response.raise_for_status()
44
+ soup = BeautifulSoup(response.content, 'html.parser')
45
+ text = soup.get_text(separator='\n', strip=True)
46
+ text = text[:10000] # Limit the text to 10000 characters
47
+ return text
48
+ except requests.RequestException as e:
49
+ return f"Error fetching URL: {str(e)}"
50
+ except Exception as e:
51
+ return f"Error extracting text: {str(e)}"
52
+
53
+ def execute(self, url):
54
+ extracted_text = self.extract_text_from_url(url)
55
+ return {
56
+ "url": url,
57
+ "extracted_text": extracted_text
58
+ }
59
+
60
+ def get_metadata(self):
61
+ """
62
+ Returns the metadata for the URL_Text_Extractor_Tool.
63
+
64
+ Returns:
65
+ dict: A dictionary containing the tool's metadata.
66
+ """
67
+ metadata = super().get_metadata()
68
+ return metadata
69
+
70
+
71
+ if __name__ == "__main__":
72
+ # Test command:
73
+ """
74
+ Run the following commands in the terminal to test the script:
75
+
76
+ cd octotools/tools/url_text_extractor
77
+ python tool.py
78
+ """
79
+
80
+ # Get the directory of the current script
81
+ script_dir = os.path.dirname(os.path.abspath(__file__))
82
+
83
+ # Example usage of the URL_Text_Extractor_Tool
84
+ tool = URL_Text_Extractor_Tool()
85
+
86
+ # Get tool metadata
87
+ metadata = tool.get_metadata()
88
+ print(metadata)
89
+
90
+ # Sample URL for extracting text
91
+ url = "https://en.wikipedia.org/wiki/Python_(programming_language)"
92
+
93
+ import json
94
+
95
+ # Execute the tool with the sample URL
96
+ try:
97
+ execution = tool.execute(url=url)
98
+ print("Execution Result:")
99
+ print(json.dumps(execution, indent=4))
100
+ for key, value in execution.items():
101
+ print(f"{key}:\n{value}\n")
102
+ except ValueError as e:
103
+ print(f"Execution failed: {e}")
104
+
105
+ print("Done!")
octotools/tools/wikipedia_knowledge_searcher/tool.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import wikipedia
3
+
4
+ from octotools.tools.base import BaseTool
5
+
6
+ class Wikipedia_Knowledge_Searcher_Tool(BaseTool):
7
+ def __init__(self):
8
+ super().__init__(
9
+ tool_name="Wikipedia_Knowledge_Searcher_Tool",
10
+ tool_description="A tool that searches Wikipedia and returns web text based on a given query.",
11
+ tool_version="1.0.0",
12
+ input_types={
13
+ "query": "str - The search query for Wikipedia.", },
14
+ output_type="dict - A dictionary containing the search results, extracted text, and any error messages.",
15
+ demo_commands=[
16
+ {
17
+ "command": 'execution = tool.execute(query="Python programming language")',
18
+ "description": "Search Wikipedia for information about Python programming language."
19
+ },
20
+ {
21
+ "command": 'execution = tool.execute(query="Artificial Intelligence")',
22
+ "description": "Search Wikipedia for information about Artificial Intelligence"
23
+ },
24
+ {
25
+ "command": 'execution = tool.execute(query="Theory of Relativity")',
26
+ "description": "Search Wikipedia for the full article about the Theory of Relativity."
27
+ },
28
+ ],
29
+ )
30
+
31
+ def search_wikipedia(self, query, max_length=2000):
32
+ """
33
+ Searches Wikipedia based on the given query and returns the text.
34
+
35
+ Parameters:
36
+ query (str): The search query for Wikipedia.
37
+ max_length (int): The maximum length of the returned text. Use -1 for full text.
38
+
39
+ Returns:
40
+ tuple: (search_results, page_text)
41
+ """
42
+ try:
43
+ search_results = wikipedia.search(query)
44
+ if not search_results:
45
+ return [], "No results found for the given query."
46
+
47
+ page = wikipedia.page(search_results[0])
48
+ text = page.content
49
+
50
+ if max_length != -1:
51
+ text = text[:max_length]
52
+
53
+ return search_results, text
54
+ except wikipedia.exceptions.DisambiguationError as e:
55
+ return e.options, f"DisambiguationError: {str(e)}"
56
+ except wikipedia.exceptions.PageError:
57
+ return [], f"PageError: No Wikipedia page found for '{query}'."
58
+ except Exception as e:
59
+ return [], f"Error searching Wikipedia: {str(e)}"
60
+
61
+ def execute(self, query, max_length=2000):
62
+ """
63
+ Searches Wikipedia based on the provided query and returns the results.
64
+
65
+ Parameters:
66
+ query (str): The search query for Wikipedia.
67
+ max_length (int): The maximum length of the returned text. Use -1 for full text.
68
+
69
+ Returns:
70
+ dict: A dictionary containing the search results, extracted text, and formatted output.
71
+ """
72
+ search_results, text = self.search_wikipedia(query, max_length)
73
+
74
+ formatted_output = f"Search results for '{query}':\n"
75
+ formatted_output += "\n".join(f"{i}. {result}" for i, result in enumerate(search_results, 1))
76
+ formatted_output += f"\n\nExtracted text:\n{text}"
77
+
78
+ return {
79
+ # "search_results": search_results,
80
+ # "extracted_text": text,
81
+ "output": formatted_output
82
+ }
83
+
84
+ def get_metadata(self):
85
+ """
86
+ Returns the metadata for the Wikipedia_Knowledge_Searcher_Tool.
87
+
88
+ Returns:
89
+ dict: A dictionary containing the tool's metadata.
90
+ """
91
+ metadata = super().get_metadata()
92
+ return metadata
93
+
94
+
95
+ if __name__ == "__main__":
96
+ # Test command:
97
+ """
98
+ Run the following commands in the terminal to test the script:
99
+
100
+ cd octotools/tools/wikipedia_knowledge_searcher
101
+ python tool.py
102
+ """
103
+
104
+ # Get the directory of the current script
105
+ script_dir = os.path.dirname(os.path.abspath(__file__))
106
+
107
+ # Example usage of the Wikipedia_Knowledge_Searcher_Tool
108
+ tool = Wikipedia_Knowledge_Searcher_Tool()
109
+
110
+ # Get tool metadata
111
+ metadata = tool.get_metadata()
112
+ print(metadata)
113
+
114
+ # Sample query for searching Wikipedia
115
+ # query = "Python programming language"
116
+ query = "kidney"
117
+
118
+ import json
119
+
120
+ # Execute the tool with the sample query
121
+ try:
122
+ execution = tool.execute(query=query)
123
+ print("Execution Result:")
124
+ print(json.dumps(execution, indent=4))
125
+ for key, value in execution.items():
126
+ print(f"{key}:\n{value}\n")
127
+ except ValueError as e:
128
+ print(f"Execution failed: {e}")
129
+
130
+ print("Done!")