Spaces:
Running
on
A10G
Running
on
A10G
opentools-->octotools; added remaining tools; polished the ui
Browse files- octotools/tools/README.md +44 -0
- octotools/tools/__init__.py +0 -0
- octotools/tools/advanced_object_detector/tool.py +236 -0
- octotools/tools/arxiv_paper_searcher/tool.py +165 -0
- octotools/tools/base.py +103 -0
- octotools/tools/generalist_solution_generator/tool.py +144 -0
- octotools/tools/google_search/tool.py +136 -0
- octotools/tools/image_captioner/tool.py +96 -0
- octotools/tools/nature_news_fetcher/tool.py +181 -0
- octotools/tools/object_detector/tool.py +179 -0
- octotools/tools/pubmed_search/tool.py +112 -0
- octotools/tools/python_code_generator/tool.py +243 -0
- octotools/tools/relevant_patch_zoomer/tool.py +188 -0
- octotools/tools/text_detector/tool.py +173 -0
- octotools/tools/url_text_extractor/tool.py +105 -0
- octotools/tools/wikipedia_knowledge_searcher/tool.py +130 -0
octotools/tools/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
## Testing the Tools
|
3 |
+
|
4 |
+
To test the text detection tool, follow these steps:
|
5 |
+
|
6 |
+
1. **Navigate to the Project Directory:**
|
7 |
+
|
8 |
+
Change your current directory to where the tools are located. Replace `your_path` with the actual path to your project directory.
|
9 |
+
|
10 |
+
```sh
|
11 |
+
cd your_path/toolbox-agent/octotools
|
12 |
+
```
|
13 |
+
|
14 |
+
2. **Run the Text Detection Tool:**
|
15 |
+
|
16 |
+
```sh
|
17 |
+
cd toolbox-agent
|
18 |
+
export PYTHONPATH=$(pwd)
|
19 |
+
```
|
20 |
+
|
21 |
+
|
22 |
+
Execute the tool using the following command:
|
23 |
+
|
24 |
+
```sh
|
25 |
+
python tools/text_detector/tool.py
|
26 |
+
|
27 |
+
python tools/object_detector/tool.py
|
28 |
+
|
29 |
+
```
|
30 |
+
|
31 |
+
## File Structure
|
32 |
+
|
33 |
+
The project is organized as follows:
|
34 |
+
|
35 |
+
```sh
|
36 |
+
├── __init__.py # Initializes the tools package and possibly exposes submodules
|
37 |
+
├── base.py # Base class for tools, providing common functionality
|
38 |
+
├── text_detector/ # Directory for the text detection tool
|
39 |
+
│ ├── readme.md # Documentation for the text detection tool
|
40 |
+
│ └── tool.py # Implementation of the text detection tool
|
41 |
+
├── object_detector/ # Directory for the object detection tool
|
42 |
+
│ ├── readme.md # Documentation for the object detection tool
|
43 |
+
│ └── tool.py # Implementation of the object detection tool
|
44 |
+
```
|
octotools/tools/__init__.py
ADDED
File without changes
|
octotools/tools/advanced_object_detector/tool.py
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Grounding DINO Object Detection Tool
|
2 |
+
# https://huggingface.co/IDEA-Research/grounding-dino
|
3 |
+
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
|
7 |
+
from octotools.tools.base import BaseTool
|
8 |
+
from PIL import Image, ImageOps
|
9 |
+
|
10 |
+
import os
|
11 |
+
# Suppress stderr by redirecting it to /dev/null
|
12 |
+
import sys
|
13 |
+
import re
|
14 |
+
import base64
|
15 |
+
import requests
|
16 |
+
sys.stderr = open(os.devnull, 'w')
|
17 |
+
|
18 |
+
|
19 |
+
class Advanced_Object_Detector_Tool(BaseTool):
|
20 |
+
def __init__(self):
|
21 |
+
super().__init__(
|
22 |
+
tool_name="Advanced_Object_Detector_Tool",
|
23 |
+
tool_description="A tool that detects objects in an image using the Grounding DINO-X model and saves individual object images with empty padding.",
|
24 |
+
tool_version="1.0.0",
|
25 |
+
input_types={
|
26 |
+
"image": "str - The path to the image file.",
|
27 |
+
"labels": "list - A list of object labels to detect.",
|
28 |
+
"threshold": "float - The confidence threshold for detection (default: 0.35).",
|
29 |
+
"padding": "int - The number of pixels to add as empty padding around detected objects (default: 20)."
|
30 |
+
},
|
31 |
+
output_type="list - A list of detected objects with their scores, bounding boxes, and saved image paths.",
|
32 |
+
demo_commands=[
|
33 |
+
{
|
34 |
+
"command": 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])',
|
35 |
+
"description": "Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths."
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"command": 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)',
|
39 |
+
"description": "Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths."
|
40 |
+
}
|
41 |
+
],
|
42 |
+
user_metadata={
|
43 |
+
"limitation": "The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification."
|
44 |
+
}
|
45 |
+
)
|
46 |
+
self.DINO_KEY = os.environ.get("DINO_KEY")
|
47 |
+
|
48 |
+
def preprocess_caption(self, caption):
|
49 |
+
result = caption.lower().strip()
|
50 |
+
if result.endswith("."):
|
51 |
+
return result
|
52 |
+
return result + "."
|
53 |
+
|
54 |
+
def build_tool(self, threshold=0.35):
|
55 |
+
|
56 |
+
params_dict = {
|
57 |
+
'headers': {
|
58 |
+
"Content-Type": "application/json",
|
59 |
+
"Token" : self.DINO_KEY
|
60 |
+
},
|
61 |
+
'body':{
|
62 |
+
"image" : None,
|
63 |
+
"prompts": [
|
64 |
+
{"type": "text", "text": None},
|
65 |
+
],
|
66 |
+
"bbox_threshold": threshold
|
67 |
+
}
|
68 |
+
|
69 |
+
}
|
70 |
+
return params_dict
|
71 |
+
|
72 |
+
|
73 |
+
def save_detected_object(self, image, box, image_name, label, index, padding):
|
74 |
+
object_image = image.crop(box)
|
75 |
+
padded_image = ImageOps.expand(object_image, border=padding, fill='white')
|
76 |
+
|
77 |
+
filename = f"{image_name}_{label}_{index}.png"
|
78 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
79 |
+
save_path = os.path.join(self.output_dir, filename)
|
80 |
+
|
81 |
+
padded_image.save(save_path)
|
82 |
+
return save_path
|
83 |
+
|
84 |
+
def execute(self, image, labels, threshold=0.35, padding=20, max_retries=10, retry_delay=5):
|
85 |
+
retry_count = 0
|
86 |
+
params = self.build_tool(threshold)
|
87 |
+
|
88 |
+
def process_image(input_str):
|
89 |
+
|
90 |
+
def image_to_base64(image_path):
|
91 |
+
with open(image_path, "rb") as image_file:
|
92 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
93 |
+
# Define common image file extensions
|
94 |
+
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.tiff', '.webp'}
|
95 |
+
|
96 |
+
# Check if it is a URL
|
97 |
+
url_pattern = re.compile(r'^(http|https|ftp)://')
|
98 |
+
if url_pattern.match(input_str):
|
99 |
+
if input_str.lower().endswith(tuple(image_extensions)):
|
100 |
+
return input_str
|
101 |
+
return input_str
|
102 |
+
|
103 |
+
# Check if it is a file path
|
104 |
+
_, ext = os.path.splitext(input_str)
|
105 |
+
if ext.lower() in image_extensions:
|
106 |
+
image_base64 = image_to_base64(input_str)
|
107 |
+
return f'data:image/png;base64,{image_base64}'
|
108 |
+
return None
|
109 |
+
|
110 |
+
if len(labels) < 1:
|
111 |
+
preprocessed_prompt = '<prompt_free>'
|
112 |
+
else:
|
113 |
+
preprocessed_prompt = ''
|
114 |
+
for label in labels:
|
115 |
+
preprocessed_prompt += self.preprocess_caption(label)
|
116 |
+
|
117 |
+
|
118 |
+
body = params['body']
|
119 |
+
body['image'] = process_image(image)
|
120 |
+
body['prompts'] = [{"type": "text", "text": preprocessed_prompt}]
|
121 |
+
|
122 |
+
# send request
|
123 |
+
resp = requests.post(
|
124 |
+
'https://api.deepdataspace.com/tasks/dinox',
|
125 |
+
json=body,
|
126 |
+
headers=params['headers']
|
127 |
+
)
|
128 |
+
|
129 |
+
if resp.status_code == 200:
|
130 |
+
json_resp = resp.json()
|
131 |
+
print(json_resp)
|
132 |
+
|
133 |
+
# get task_uuid
|
134 |
+
task_uuid = json_resp["data"]["task_uuid"]
|
135 |
+
print(f'task_uuid:{task_uuid}')
|
136 |
+
|
137 |
+
# poll get task result
|
138 |
+
while retry_count < max_retries:
|
139 |
+
resp = requests.get(f'https://api.deepdataspace.com/task_statuses/{task_uuid}', headers=params['headers'])
|
140 |
+
|
141 |
+
|
142 |
+
if resp.status_code != 200:
|
143 |
+
break
|
144 |
+
json_resp = resp.json()
|
145 |
+
|
146 |
+
if json_resp["data"]["status"] not in ["waiting", "running"]:
|
147 |
+
break
|
148 |
+
time.sleep(1)#retry_delay)
|
149 |
+
retry_count += 1
|
150 |
+
|
151 |
+
if json_resp["data"]["status"] == "failed":
|
152 |
+
print(f'failed resp: {json_resp}')
|
153 |
+
elif json_resp["data"]["status"] == "success":
|
154 |
+
# print(f'success resp: {json_resp}')
|
155 |
+
formatted_results = []
|
156 |
+
original_image = Image.open(image)
|
157 |
+
image_name = os.path.splitext(os.path.basename(image))[0]
|
158 |
+
|
159 |
+
object_counts = {}
|
160 |
+
|
161 |
+
for result in json_resp['data']['result']['objects']:
|
162 |
+
box = tuple(result["bbox"])
|
163 |
+
try:
|
164 |
+
box = [int(x) for x in box]
|
165 |
+
except:
|
166 |
+
continue
|
167 |
+
label = result["category"]
|
168 |
+
score = round(result["score"], 2)
|
169 |
+
if label.endswith("."):
|
170 |
+
label = label[:-1]
|
171 |
+
|
172 |
+
object_counts[label] = object_counts.get(label, 0) + 1
|
173 |
+
index = object_counts[label]
|
174 |
+
|
175 |
+
save_path = self.save_detected_object(original_image, box, image_name, label, index, padding)
|
176 |
+
|
177 |
+
formatted_results.append({
|
178 |
+
"label": label,
|
179 |
+
"confidence score": score,
|
180 |
+
"box": box,
|
181 |
+
"saved_image_path": save_path
|
182 |
+
})
|
183 |
+
|
184 |
+
return formatted_results
|
185 |
+
else:
|
186 |
+
print(f'get task resp: {resp.status_code} - {resp.text}')
|
187 |
+
else:
|
188 |
+
print(f'Error: {resp.status_code} - {resp.text}')
|
189 |
+
|
190 |
+
print(f"Failed to detect objects after {max_retries} attempts.")
|
191 |
+
return []
|
192 |
+
|
193 |
+
def get_metadata(self):
|
194 |
+
metadata = super().get_metadata()
|
195 |
+
return metadata
|
196 |
+
|
197 |
+
if __name__ == "__main__":
|
198 |
+
# Test command:
|
199 |
+
"""
|
200 |
+
Run the following commands in the terminal to test the script:
|
201 |
+
|
202 |
+
cd octotools/tools/advanced_object_detector
|
203 |
+
python tool.py
|
204 |
+
"""
|
205 |
+
|
206 |
+
# Get the directory of the current script
|
207 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
208 |
+
|
209 |
+
# Example usage of the Object_Detector_Tool
|
210 |
+
tool = Advanced_Object_Detector_Tool()
|
211 |
+
tool.set_custom_output_dir("detected_objects")
|
212 |
+
|
213 |
+
# Get tool metadata
|
214 |
+
metadata = tool.get_metadata()
|
215 |
+
# print(metadata)
|
216 |
+
|
217 |
+
# Construct the full path to the image using the script's directory
|
218 |
+
relative_image_path = "examples/baseball.png"
|
219 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
220 |
+
|
221 |
+
import json
|
222 |
+
|
223 |
+
# Execute the tool
|
224 |
+
try:
|
225 |
+
execution = tool.execute(image=image_path, labels=["baseball", "basket"], padding=20)
|
226 |
+
print(json.dumps(execution, indent=4))
|
227 |
+
print("Detected Objects:")
|
228 |
+
for obj in execution:
|
229 |
+
print(f"Detected {obj['label']} with confidence {obj['confidence score']}")
|
230 |
+
print(f"Bounding box: {obj['box']}")
|
231 |
+
print(f"Saved image (with padding): {obj['saved_image_path']}")
|
232 |
+
print()
|
233 |
+
except ValueError as e:
|
234 |
+
print(f"Execution failed: {e}")
|
235 |
+
|
236 |
+
print("Done!")
|
octotools/tools/arxiv_paper_searcher/tool.py
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
|
5 |
+
from octotools.tools.base import BaseTool
|
6 |
+
|
7 |
+
class ArXiv_Paper_Searcher_Tool(BaseTool):
|
8 |
+
def __init__(self):
|
9 |
+
super().__init__(
|
10 |
+
tool_name="ArXiv_Paper_Searcher_Tool",
|
11 |
+
tool_description="A tool that searches arXiv for papers based on a given query.",
|
12 |
+
tool_version="1.0.0",
|
13 |
+
input_types={
|
14 |
+
"query": "str - The search query for arXiv papers.",
|
15 |
+
"size": "int - The number of results per page (25, 50, 100, or 200). If None, use 25.",
|
16 |
+
"max_results": "int - The maximum number of papers to return (default: 25). Should be less than or equal to 100."
|
17 |
+
},
|
18 |
+
output_type="list - A list of dictionaries containing paper information.",
|
19 |
+
demo_commands=[
|
20 |
+
{
|
21 |
+
"command": 'execution = tool.execute(query="tool agents with large language models")',
|
22 |
+
"description": "Search for papers about tool agents with large language models."
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"command": 'execution = tool.execute(query="quantum computing", size=100, max_results=50)',
|
26 |
+
"description": "Search for quantum computing papers, with 100 results per page, returning a maximum of 50 papers."
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"command": 'execution = tool.execute(query="machine learning", max_results=75)',
|
30 |
+
"description": "Search for machine learning papers, returning a maximum of 75 papers."
|
31 |
+
},
|
32 |
+
],
|
33 |
+
user_metadata={
|
34 |
+
"valid_sizes": [25, 50, 100, 200],
|
35 |
+
"base_url": "https://arxiv.org/search/"
|
36 |
+
}
|
37 |
+
)
|
38 |
+
|
39 |
+
def build_tool(self):
|
40 |
+
"""
|
41 |
+
No specific build required for this tool.
|
42 |
+
"""
|
43 |
+
pass
|
44 |
+
|
45 |
+
def execute(self, query, size=None, max_results=25):
|
46 |
+
"""
|
47 |
+
Executes the arXiv search tool to find papers based on the given query.
|
48 |
+
|
49 |
+
Parameters:
|
50 |
+
query (str): The search query for arXiv papers.
|
51 |
+
size (int): The number of results per page.
|
52 |
+
max_results (int): The maximum number of papers to return.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
list: A list of dictionaries containing paper information.
|
56 |
+
"""
|
57 |
+
valid_sizes = self.user_metadata["valid_sizes"]
|
58 |
+
base_url = self.user_metadata["base_url"]
|
59 |
+
|
60 |
+
if size is None:
|
61 |
+
size = 25
|
62 |
+
elif size not in valid_sizes:
|
63 |
+
size = min(valid_sizes, key=lambda x: abs(x - size))
|
64 |
+
|
65 |
+
results = []
|
66 |
+
start = 0
|
67 |
+
|
68 |
+
max_results = min(max_results, 100) # NOTE: For traffic reasons, limit to 100 results
|
69 |
+
|
70 |
+
while len(results) < max_results:
|
71 |
+
params = {
|
72 |
+
"searchtype": "all",
|
73 |
+
"query": query,
|
74 |
+
"abstracts": "show",
|
75 |
+
"order": "",
|
76 |
+
"size": str(size),
|
77 |
+
"start": str(start)
|
78 |
+
}
|
79 |
+
|
80 |
+
try:
|
81 |
+
response = requests.get(base_url, params=params)
|
82 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
83 |
+
|
84 |
+
papers = soup.find_all("li", class_="arxiv-result")
|
85 |
+
if not papers:
|
86 |
+
break
|
87 |
+
|
88 |
+
for paper in papers:
|
89 |
+
if len(results) >= max_results:
|
90 |
+
break
|
91 |
+
|
92 |
+
title = paper.find("p", class_="title").text.strip()
|
93 |
+
authors = paper.find("p", class_="authors").text.strip()
|
94 |
+
authors = re.sub(r'^Authors:\s*', '', authors)
|
95 |
+
authors = re.sub(r'\s+', ' ', authors).strip()
|
96 |
+
|
97 |
+
abstract = paper.find("span", class_="abstract-full").text.strip()
|
98 |
+
abstract = abstract.replace("△ Less", "").strip()
|
99 |
+
|
100 |
+
link = paper.find("p", class_="list-title").find("a")["href"]
|
101 |
+
|
102 |
+
results.append({
|
103 |
+
"title": title,
|
104 |
+
"authors": authors,
|
105 |
+
"abstract": abstract,
|
106 |
+
"link": f"{link}"
|
107 |
+
})
|
108 |
+
|
109 |
+
start += size
|
110 |
+
|
111 |
+
except Exception as e:
|
112 |
+
print(f"Error searching arXiv: {e}")
|
113 |
+
break
|
114 |
+
|
115 |
+
return results[:max_results]
|
116 |
+
|
117 |
+
def get_metadata(self):
|
118 |
+
"""
|
119 |
+
Returns the metadata for the ArXiv_Paper_Searcher_Tool.
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
dict: A dictionary containing the tool's metadata.
|
123 |
+
"""
|
124 |
+
metadata = super().get_metadata()
|
125 |
+
return metadata
|
126 |
+
|
127 |
+
if __name__ == "__main__":
|
128 |
+
# Test command:
|
129 |
+
"""
|
130 |
+
Run the following commands in the terminal to test the script:
|
131 |
+
|
132 |
+
cd octotools/tools/arxiv_paper_searcher
|
133 |
+
python tool.py
|
134 |
+
"""
|
135 |
+
|
136 |
+
import json
|
137 |
+
|
138 |
+
print("ArXiv Search Tool Test")
|
139 |
+
|
140 |
+
# Example usage of the ArXiv_Paper_Searcher_Tool
|
141 |
+
tool = ArXiv_Paper_Searcher_Tool()
|
142 |
+
|
143 |
+
# Get tool metadata
|
144 |
+
metadata = tool.get_metadata()
|
145 |
+
print("Tool Metadata:")
|
146 |
+
print(metadata)
|
147 |
+
|
148 |
+
# Sample query for searching arXiv
|
149 |
+
query = "enhance mathematical reasoning with large language models"
|
150 |
+
# Execute the tool
|
151 |
+
try:
|
152 |
+
execution = tool.execute(query=query, size=50, max_results=10)
|
153 |
+
print("\n==>> Execution:")
|
154 |
+
print(json.dumps(execution, indent=4)) # Pretty print JSON
|
155 |
+
print("\n==>> Search Results:")
|
156 |
+
for i, paper in enumerate(execution, 1):
|
157 |
+
print(f"{i}. {paper['title']}")
|
158 |
+
print(f" Authors: {paper['authors']}")
|
159 |
+
print(f" Abstract: {paper['abstract'][:2000]}")
|
160 |
+
print(f" Link: {paper['link']}")
|
161 |
+
print()
|
162 |
+
except Exception as e:
|
163 |
+
print(f"Execution failed: {e}")
|
164 |
+
|
165 |
+
print("Done!")
|
octotools/tools/base.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# octotools/tools/base.py
|
2 |
+
|
3 |
+
from octotools.engine.openai import ChatOpenAI
|
4 |
+
|
5 |
+
class BaseTool:
|
6 |
+
"""
|
7 |
+
A base class for building tool classes that perform specific tasks, such as image processing or text detection.
|
8 |
+
"""
|
9 |
+
|
10 |
+
require_llm_engine = False # Default is False, tools that need LLM should set this to True
|
11 |
+
|
12 |
+
def __init__(self, tool_name=None, tool_description=None, tool_version=None, input_types=None, output_type=None, demo_commands=None, output_dir=None, user_metadata=None, model_string=None):
|
13 |
+
"""
|
14 |
+
Initialize the base tool with optional metadata.
|
15 |
+
|
16 |
+
Parameters:
|
17 |
+
tool_name (str): The name of the tool.
|
18 |
+
tool_description (str): A description of the tool.
|
19 |
+
tool_version (str): The version of the tool.
|
20 |
+
input_types (dict): The expected input types for the tool.
|
21 |
+
output_type (str): The expected output type for the tool.
|
22 |
+
demo_commands (list): A list of example commands for using the tool.
|
23 |
+
output_dir (str): The directory where the tool should save its output (optional).
|
24 |
+
user_metadata (dict): Additional metadata specific to user needs (optional).
|
25 |
+
model_string (str): The model string for the LLM engine (optional, only used if require_llm_engine is True).
|
26 |
+
"""
|
27 |
+
self.tool_name = tool_name
|
28 |
+
self.tool_description = tool_description
|
29 |
+
self.tool_version = tool_version
|
30 |
+
self.input_types = input_types
|
31 |
+
self.output_type = output_type
|
32 |
+
self.demo_commands = demo_commands
|
33 |
+
self.output_dir = output_dir
|
34 |
+
self.user_metadata = user_metadata
|
35 |
+
self.model_string = model_string
|
36 |
+
|
37 |
+
def set_metadata(self, tool_name, tool_description, tool_version, input_types, output_type, demo_commands, user_metadata=None):
|
38 |
+
"""
|
39 |
+
Set the metadata for the tool.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
tool_name (str): The name of the tool.
|
43 |
+
tool_description (str): A description of the tool.
|
44 |
+
tool_version (str): The version of the tool.
|
45 |
+
input_types (dict): The expected input types for the tool.
|
46 |
+
output_type (str): The expected output type for the tool.
|
47 |
+
demo_commands (list): A list of example commands for using the tool.
|
48 |
+
user_metadata (dict): Additional metadata specific to user needs (optional).
|
49 |
+
"""
|
50 |
+
self.tool_name = tool_name
|
51 |
+
self.tool_description = tool_description
|
52 |
+
self.tool_version = tool_version
|
53 |
+
self.input_types = input_types
|
54 |
+
self.output_type = output_type
|
55 |
+
self.demo_commands = demo_commands
|
56 |
+
self.user_metadata = user_metadata
|
57 |
+
|
58 |
+
def get_metadata(self):
|
59 |
+
"""
|
60 |
+
Returns the metadata for the tool.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
dict: A dictionary containing the tool's metadata.
|
64 |
+
"""
|
65 |
+
metadata = {
|
66 |
+
"tool_name": self.tool_name,
|
67 |
+
"tool_description": self.tool_description,
|
68 |
+
"tool_version": self.tool_version,
|
69 |
+
"input_types": self.input_types,
|
70 |
+
"output_type": self.output_type,
|
71 |
+
"demo_commands": self.demo_commands,
|
72 |
+
"require_llm_engine": self.require_llm_engine,
|
73 |
+
}
|
74 |
+
if self.user_metadata:
|
75 |
+
metadata["user_metadata"] = self.user_metadata
|
76 |
+
return metadata
|
77 |
+
|
78 |
+
def set_custom_output_dir(self, output_dir):
|
79 |
+
"""
|
80 |
+
Set a custom output directory for the tool.
|
81 |
+
|
82 |
+
Parameters:
|
83 |
+
output_dir (str): The new output directory path.
|
84 |
+
"""
|
85 |
+
self.output_dir = output_dir
|
86 |
+
|
87 |
+
def set_llm_engine(self, model_string):
|
88 |
+
"""
|
89 |
+
Set the LLM engine for the tool.
|
90 |
+
|
91 |
+
Parameters:
|
92 |
+
model_string (str): The model string for the LLM engine.
|
93 |
+
"""
|
94 |
+
self.model_string = model_string
|
95 |
+
|
96 |
+
def execute(self, *args, **kwargs):
|
97 |
+
"""
|
98 |
+
Execute the tool's main functionality. This method should be overridden by subclasses.
|
99 |
+
|
100 |
+
Raises:
|
101 |
+
NotImplementedError: If the subclass does not implement this method.
|
102 |
+
"""
|
103 |
+
raise NotImplementedError("Subclasses must implement the execute method.")
|
octotools/tools/generalist_solution_generator/tool.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from octotools.tools.base import BaseTool
|
3 |
+
from octotools.engine.openai import ChatOpenAI
|
4 |
+
|
5 |
+
class Generalist_Solution_Generator_Tool(BaseTool):
|
6 |
+
require_llm_engine = True
|
7 |
+
require_api_key = True
|
8 |
+
|
9 |
+
def __init__(self, model_string="gpt-4o-mini", api_key=None):
|
10 |
+
super().__init__(
|
11 |
+
tool_name="Generalist_Solution_Generator_Tool",
|
12 |
+
tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
|
13 |
+
tool_version="1.0.0",
|
14 |
+
input_types={
|
15 |
+
"prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
|
16 |
+
"image": "str - The path to the image file if applicable (default: None).",
|
17 |
+
},
|
18 |
+
output_type="str - The generated response to the original query prompt",
|
19 |
+
demo_commands=[
|
20 |
+
{
|
21 |
+
"command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
|
22 |
+
"description": "Generate a short summary given the prompt from the user."
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
|
26 |
+
"description": "Generate a caption focusing on the mood using a specific prompt and image."
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
|
30 |
+
"description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
|
34 |
+
"description": "Answer a question step by step given the image."
|
35 |
+
}
|
36 |
+
],
|
37 |
+
# # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
|
38 |
+
# user_metadata = {
|
39 |
+
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
|
40 |
+
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
|
41 |
+
# }
|
42 |
+
# vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
|
43 |
+
user_metadata = {
|
44 |
+
"limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
|
45 |
+
"best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
|
46 |
+
"1) Provide clear, specific prompts.\n"
|
47 |
+
"2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
|
48 |
+
"3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
|
49 |
+
"4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
|
50 |
+
"5) Verify important information from its responses.\n"
|
51 |
+
"6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
|
52 |
+
}
|
53 |
+
# # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
|
54 |
+
# user_metadata = {
|
55 |
+
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
|
56 |
+
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
|
57 |
+
# "1) Provide clear, specific prompts.\n"
|
58 |
+
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
|
59 |
+
# "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
|
60 |
+
# "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
|
61 |
+
# "5) Verify important information from its responses.\n"
|
62 |
+
# "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
|
63 |
+
# }
|
64 |
+
# # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
|
65 |
+
# user_metadata = {
|
66 |
+
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
|
67 |
+
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
|
68 |
+
# "1) Provide clear, specific prompts.\n"
|
69 |
+
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
|
70 |
+
# "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
|
71 |
+
# "4) Verify important information from its responses.\n"
|
72 |
+
# "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
|
73 |
+
# }
|
74 |
+
)
|
75 |
+
self.model_string = model_string
|
76 |
+
self.api_key = api_key
|
77 |
+
|
78 |
+
def execute(self, prompt, image=None):
|
79 |
+
|
80 |
+
print(f"\nInitializing Generalist Tool with model: {self.model_string}")
|
81 |
+
multimodal = True if image else False
|
82 |
+
llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal, api_key=self.api_key)
|
83 |
+
|
84 |
+
try:
|
85 |
+
input_data = [prompt]
|
86 |
+
if multimodal:
|
87 |
+
if not os.path.isfile(image):
|
88 |
+
return "Error: Invalid image file path."
|
89 |
+
try:
|
90 |
+
with open(image, 'rb') as file:
|
91 |
+
image_bytes = file.read()
|
92 |
+
input_data.append(image_bytes)
|
93 |
+
except Exception as e:
|
94 |
+
return f"Error reading image file: {str(e)}"
|
95 |
+
|
96 |
+
response = llm_engine(input_data)
|
97 |
+
else:
|
98 |
+
response = llm_engine(input_data[0])
|
99 |
+
return response
|
100 |
+
except Exception as e:
|
101 |
+
return f"Error generating response: {str(e)}"
|
102 |
+
|
103 |
+
def get_metadata(self):
|
104 |
+
metadata = super().get_metadata()
|
105 |
+
return metadata
|
106 |
+
|
107 |
+
if __name__ == "__main__":
|
108 |
+
# Test command:
|
109 |
+
"""
|
110 |
+
Run the following commands in the terminal to test the script:
|
111 |
+
|
112 |
+
cd octotools
|
113 |
+
python tools/default/tool.py
|
114 |
+
"""
|
115 |
+
|
116 |
+
# Get the directory of the current script
|
117 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
118 |
+
print(f"Script directory: {script_dir}")
|
119 |
+
|
120 |
+
# Example usage of the Generalist_Tool
|
121 |
+
tool = Generalist_Solution_Generator_Tool()
|
122 |
+
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
|
123 |
+
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")
|
124 |
+
|
125 |
+
# Get tool metadata
|
126 |
+
metadata = tool.get_metadata()
|
127 |
+
print(metadata)
|
128 |
+
|
129 |
+
# Construct the full path to the image using the script's directory
|
130 |
+
relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
|
131 |
+
relative_image_path = "examples/mathvista_113.png"
|
132 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
133 |
+
prompt = "Describe the image in detail."
|
134 |
+
|
135 |
+
# Execute the tool with default prompt
|
136 |
+
try:
|
137 |
+
execution = tool.execute(prompt=prompt, image=image_path)
|
138 |
+
# execution = tool.execute(prompt=prompt)
|
139 |
+
print("Generated Response:")
|
140 |
+
print(execution)
|
141 |
+
except Exception as e:
|
142 |
+
print(f"Execution failed: {e}")
|
143 |
+
|
144 |
+
print("Done!")
|
octotools/tools/google_search/tool.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from typing import List, Dict, Any
|
4 |
+
|
5 |
+
from octotools.tools.base import BaseTool
|
6 |
+
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
class Google_Search_Tool(BaseTool):
|
11 |
+
def __init__(self):
|
12 |
+
super().__init__(
|
13 |
+
tool_name="Google_Search_Tool",
|
14 |
+
tool_description="A tool that performs Google searches based on a given text query.",
|
15 |
+
tool_version="1.0.0",
|
16 |
+
input_types={
|
17 |
+
"query": "str - The search query to be used for the Google search.",
|
18 |
+
"num_results": "int - The number of search results to return (default: 10).",
|
19 |
+
},
|
20 |
+
output_type="list - A list of dictionaries containing search result information.",
|
21 |
+
demo_commands=[
|
22 |
+
{
|
23 |
+
"command": 'execution = tool.execute(query="Python programming")',
|
24 |
+
"description": "Perform a Google search for 'Python programming' and return the default number of results."
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"command": 'execution = tool.execute(query="Machine learning tutorials", num_results=5)',
|
28 |
+
"description": "Perform a Google search for 'Machine learning tutorials' and return 5 results."
|
29 |
+
},
|
30 |
+
],
|
31 |
+
)
|
32 |
+
# self.api_key = os.getenv("GOOGLE_API_KEY")
|
33 |
+
self.api_key = os.getenv("GOOGLE_API_KEY") # NOTE: Replace with your own API key (Ref: https://developers.google.com/custom-search/v1/introduction)
|
34 |
+
self.cx = os.getenv("GOOGLE_CX") # NOTE: Replace with your own custom search (Ref: https://programmablesearchengine.google.com/controlpanel/all)
|
35 |
+
self.base_url = "https://www.googleapis.com/customsearch/v1"
|
36 |
+
|
37 |
+
def google_search(self, query: str, num_results: int = 10) -> Dict[str, Any]:
|
38 |
+
"""
|
39 |
+
Performs a Google search using the provided query.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
query (str): The search query.
|
43 |
+
num_results (int): The number of search results to return.
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
Dict[str, Any]: The raw search results from the Google API.
|
47 |
+
"""
|
48 |
+
params = {
|
49 |
+
'q': query,
|
50 |
+
'key': self.api_key,
|
51 |
+
'cx': self.cx,
|
52 |
+
'num': num_results
|
53 |
+
}
|
54 |
+
|
55 |
+
response = requests.get(self.base_url, params=params)
|
56 |
+
return response.json()
|
57 |
+
|
58 |
+
def execute(self, query: str, num_results: int = 10) -> List[Dict[str, Any]]:
|
59 |
+
"""
|
60 |
+
Executes a Google search based on the provided query.
|
61 |
+
|
62 |
+
Parameters:
|
63 |
+
query (str): The search query.
|
64 |
+
num_results (int): The number of search results to return (default: 10).
|
65 |
+
|
66 |
+
Returns:
|
67 |
+
List[Dict[str, Any]]: A list of dictionaries containing search result information.
|
68 |
+
"""
|
69 |
+
if not self.api_key:
|
70 |
+
return [{"error": "Google API key is not set. Please set the GOOGLE_API_KEY environment variable."}]
|
71 |
+
|
72 |
+
try:
|
73 |
+
results = self.google_search(query, num_results)
|
74 |
+
print(results)
|
75 |
+
|
76 |
+
if 'items' in results:
|
77 |
+
return [
|
78 |
+
{
|
79 |
+
"title": item['title'],
|
80 |
+
"link": item['link'],
|
81 |
+
"snippet": item['snippet']
|
82 |
+
}
|
83 |
+
for item in results['items']
|
84 |
+
]
|
85 |
+
else:
|
86 |
+
return [{"error": "No results found."}]
|
87 |
+
except Exception as e:
|
88 |
+
return [{"error": f"An error occurred: {str(e)}"}]
|
89 |
+
|
90 |
+
def get_metadata(self):
|
91 |
+
"""
|
92 |
+
Returns the metadata for the Google_Search_Tool.
|
93 |
+
|
94 |
+
Returns:
|
95 |
+
dict: A dictionary containing the tool's metadata.
|
96 |
+
"""
|
97 |
+
metadata = super().get_metadata()
|
98 |
+
return metadata
|
99 |
+
|
100 |
+
|
101 |
+
if __name__ == "__main__":
|
102 |
+
# Test command:
|
103 |
+
"""
|
104 |
+
Run the following commands in the terminal to test the script:
|
105 |
+
|
106 |
+
export GOOGLE_API_KEY=your_api_key_here
|
107 |
+
cd octotools/tools/google_search
|
108 |
+
python tool.py
|
109 |
+
"""
|
110 |
+
|
111 |
+
# Example usage of the Google_Search_Tool
|
112 |
+
tool = Google_Search_Tool()
|
113 |
+
|
114 |
+
# Get tool metadata
|
115 |
+
metadata = tool.get_metadata()
|
116 |
+
print(metadata)
|
117 |
+
|
118 |
+
# Execute the tool to perform a Google search
|
119 |
+
query = "nobel prize winners in chemistry 2024"
|
120 |
+
try:
|
121 |
+
execution = tool.execute(query=query, num_results=5)
|
122 |
+
print("\nExecution Result:")
|
123 |
+
print(f"Search query: {query}")
|
124 |
+
print(f"Number of results: {len(execution)}")
|
125 |
+
print("\nSearch Results:")
|
126 |
+
if "error" in execution[0]:
|
127 |
+
print(f"Error: {execution[0]['error']}")
|
128 |
+
else:
|
129 |
+
for i, item in enumerate(execution, 1):
|
130 |
+
print(f"\n{i}. Title: {item['title']}")
|
131 |
+
print(f" URL: {item['link']}")
|
132 |
+
print(f" Snippet: {item['snippet']}")
|
133 |
+
except Exception as e:
|
134 |
+
print(f"Execution failed: {e}")
|
135 |
+
|
136 |
+
print("Done!")
|
octotools/tools/image_captioner/tool.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from octotools.tools.base import BaseTool
|
3 |
+
from octotools.engine.openai import ChatOpenAI
|
4 |
+
|
5 |
+
class Image_Captioner_Tool(BaseTool):
|
6 |
+
require_llm_engine = True
|
7 |
+
|
8 |
+
def __init__(self, model_string="gpt-4o-mini"):
|
9 |
+
super().__init__(
|
10 |
+
tool_name="Image_Captioner_Tool",
|
11 |
+
tool_description="A tool that generates captions for images using OpenAI's multimodal model.",
|
12 |
+
tool_version="1.0.0",
|
13 |
+
input_types={
|
14 |
+
"image": "str - The path to the image file.",
|
15 |
+
"prompt": "str - The prompt to guide the image captioning (default: 'Describe this image in detail.').",
|
16 |
+
},
|
17 |
+
output_type="str - The generated caption for the image.",
|
18 |
+
demo_commands=[
|
19 |
+
{
|
20 |
+
"command": 'execution = tool.execute(image="path/to/image.png")',
|
21 |
+
"description": "Generate a caption for an image using the default prompt and model."
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"command": 'execution = tool.execute(image="path/to/image.png", prompt="Explain the mood of this scene.")',
|
25 |
+
"description": "Generate a caption focusing on the mood using a specific prompt and model."
|
26 |
+
}
|
27 |
+
],
|
28 |
+
user_metadata = {
|
29 |
+
"limitation": "The Image_Captioner_Tool provides general image descriptions but has limitations: 1) May make mistakes in complex scenes, counting, attribute detection, and understanding object relationships. 2) Might not generate comprehensive captions, especially for images with multiple objects or abstract concepts. 3) Performance varies with image complexity. 4) Struggles with culturally specific or domain-specific content. 5) May overlook details or misinterpret object relationships. For precise descriptions, consider: using it with other tools for context/verification, as an initial step before refinement, or in multi-step processes for ambiguity resolution. Verify critical information with specialized tools or human expertise when necessary."
|
30 |
+
},
|
31 |
+
)
|
32 |
+
print(f"\nInitializing Image Captioner Tool with model: {model_string}")
|
33 |
+
self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=True) if model_string else None
|
34 |
+
|
35 |
+
def execute(self, image, prompt="Describe this image in detail."):
|
36 |
+
try:
|
37 |
+
if not self.llm_engine:
|
38 |
+
return "Error: LLM engine not initialized. Please provide a valid model_string."
|
39 |
+
|
40 |
+
input_data = [prompt]
|
41 |
+
|
42 |
+
if image and os.path.isfile(image):
|
43 |
+
try:
|
44 |
+
with open(image, 'rb') as file:
|
45 |
+
image_bytes = file.read()
|
46 |
+
input_data.append(image_bytes)
|
47 |
+
except Exception as e:
|
48 |
+
return f"Error reading image file: {str(e)}"
|
49 |
+
else:
|
50 |
+
return "Error: Invalid image file path."
|
51 |
+
|
52 |
+
caption = self.llm_engine(input_data)
|
53 |
+
return caption
|
54 |
+
except Exception as e:
|
55 |
+
return f"Error generating caption: {str(e)}"
|
56 |
+
|
57 |
+
def get_metadata(self):
|
58 |
+
metadata = super().get_metadata()
|
59 |
+
metadata['require_llm_engine'] = self.require_llm_engine # NOTE: can be removed if not needed
|
60 |
+
return metadata
|
61 |
+
|
62 |
+
if __name__ == "__main__":
|
63 |
+
# Test command:
|
64 |
+
"""
|
65 |
+
Run the following commands in the terminal to test the script:
|
66 |
+
|
67 |
+
cd octotools/tools/image_captioner
|
68 |
+
python tool.py
|
69 |
+
"""
|
70 |
+
|
71 |
+
import json
|
72 |
+
|
73 |
+
# Get the directory of the current script
|
74 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
75 |
+
|
76 |
+
# Example usage of the Image_Captioner_Tool
|
77 |
+
# tool = Image_Captioner_Tool()
|
78 |
+
tool = Image_Captioner_Tool(model_string="gpt-4o")
|
79 |
+
|
80 |
+
# Get tool metadata
|
81 |
+
metadata = tool.get_metadata()
|
82 |
+
print(metadata)
|
83 |
+
|
84 |
+
# Construct the full path to the image using the script's directory
|
85 |
+
relative_image_path = "examples/baseball.png"
|
86 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
87 |
+
|
88 |
+
# Execute the tool with default prompt
|
89 |
+
try:
|
90 |
+
execution = tool.execute(image=image_path)
|
91 |
+
print("Generated Caption:")
|
92 |
+
print(json.dumps(execution, indent=4))
|
93 |
+
except Exception as e:
|
94 |
+
print(f"Execution failed: {e}")
|
95 |
+
|
96 |
+
print("Done!")
|
octotools/tools/nature_news_fetcher/tool.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import time
|
5 |
+
|
6 |
+
from octotools.tools.base import BaseTool
|
7 |
+
|
8 |
+
class Nature_News_Fetcher_Tool(BaseTool):
|
9 |
+
def __init__(self):
|
10 |
+
super().__init__(
|
11 |
+
tool_name="Nature_News_Fetcher_Tool",
|
12 |
+
tool_description="A tool that fetches the latest news articles from Nature.",
|
13 |
+
tool_version="1.0.0",
|
14 |
+
input_types={
|
15 |
+
"num_articles": "int - The number of articles to fetch (default: 100).",
|
16 |
+
"max_pages": "int - The maximum number of pages to fetch (default: 5).",
|
17 |
+
},
|
18 |
+
output_type="list - A list of dictionaries containing information about the latest Nature news articles.",
|
19 |
+
demo_commands=[
|
20 |
+
{
|
21 |
+
"command": 'execution = tool.execute()',
|
22 |
+
"description": "Fetch the latest 100 news articles from Nature."
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"command": 'execution = tool.execute(num_articles=50, max_pages=3)',
|
26 |
+
"description": "Fetch the latest 50 news articles from Nature, searching up to 3 pages."
|
27 |
+
},
|
28 |
+
],
|
29 |
+
)
|
30 |
+
self.base_url = "https://www.nature.com/nature/articles"
|
31 |
+
|
32 |
+
def fetch_page(self, page_number):
|
33 |
+
"""
|
34 |
+
Fetches a single page of news articles from Nature's website.
|
35 |
+
|
36 |
+
Parameters:
|
37 |
+
page_number (int): The page number to fetch.
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
str: The HTML content of the page.
|
41 |
+
"""
|
42 |
+
params = {
|
43 |
+
"searchType": "journalSearch",
|
44 |
+
"sort": "PubDate",
|
45 |
+
"type": "news",
|
46 |
+
"page": str(page_number)
|
47 |
+
}
|
48 |
+
headers = {
|
49 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
50 |
+
}
|
51 |
+
response = requests.get(self.base_url, params=params, headers=headers)
|
52 |
+
response.raise_for_status()
|
53 |
+
return response.text
|
54 |
+
|
55 |
+
def parse_articles(self, html_content):
|
56 |
+
"""
|
57 |
+
Parses the HTML content and extracts article information.
|
58 |
+
|
59 |
+
Parameters:
|
60 |
+
html_content (str): The HTML content of the page.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
list: A list of dictionaries containing article information.
|
64 |
+
"""
|
65 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
66 |
+
articles_section = soup.find('section', id='new-article-list')
|
67 |
+
if not articles_section:
|
68 |
+
return []
|
69 |
+
|
70 |
+
articles = []
|
71 |
+
for article in articles_section.find_all('article', class_='c-card'):
|
72 |
+
title_elem = article.find('h3', class_='c-card__title')
|
73 |
+
title = title_elem.text.strip() if title_elem else "No title found"
|
74 |
+
|
75 |
+
url_elem = title_elem.find('a') if title_elem else None
|
76 |
+
url = "https://www.nature.com" + url_elem['href'] if url_elem and 'href' in url_elem.attrs else "No URL found"
|
77 |
+
|
78 |
+
description_elem = article.find('div', {'data-test': 'article-description'})
|
79 |
+
description = description_elem.text.strip() if description_elem else "No description available"
|
80 |
+
|
81 |
+
authors_elem = article.find('ul', {'data-test': 'author-list'})
|
82 |
+
authors = [author.text.strip() for author in authors_elem.find_all('li')] if authors_elem else ["No authors found"]
|
83 |
+
|
84 |
+
date_elem = article.find('time')
|
85 |
+
date = date_elem['datetime'] if date_elem and 'datetime' in date_elem.attrs else "No date found"
|
86 |
+
|
87 |
+
image_elem = article.find('img')
|
88 |
+
image_url = image_elem['src'] if image_elem and 'src' in image_elem.attrs else "No image found"
|
89 |
+
|
90 |
+
articles.append({
|
91 |
+
'title': title,
|
92 |
+
'url': url,
|
93 |
+
'description': description,
|
94 |
+
'authors': authors,
|
95 |
+
'date': date,
|
96 |
+
'image_url': image_url
|
97 |
+
})
|
98 |
+
|
99 |
+
return articles
|
100 |
+
|
101 |
+
def execute(self, num_articles=100, max_pages=5):
|
102 |
+
"""
|
103 |
+
Fetches the latest news articles from Nature's website.
|
104 |
+
|
105 |
+
Parameters:
|
106 |
+
num_articles (int): The number of articles to fetch.
|
107 |
+
max_pages (int): The maximum number of pages to fetch.
|
108 |
+
|
109 |
+
Returns:
|
110 |
+
list: A list of dictionaries containing article information.
|
111 |
+
"""
|
112 |
+
all_articles = []
|
113 |
+
page_number = 1
|
114 |
+
|
115 |
+
try:
|
116 |
+
while len(all_articles) < num_articles and page_number <= max_pages:
|
117 |
+
html_content = self.fetch_page(page_number)
|
118 |
+
page_articles = self.parse_articles(html_content)
|
119 |
+
|
120 |
+
if not page_articles:
|
121 |
+
break # No more articles found
|
122 |
+
|
123 |
+
all_articles.extend(page_articles)
|
124 |
+
page_number += 1
|
125 |
+
time.sleep(1) # Be polite to the server
|
126 |
+
|
127 |
+
return all_articles[:num_articles]
|
128 |
+
except Exception as e:
|
129 |
+
return [{"error": str(e)}]
|
130 |
+
|
131 |
+
def get_metadata(self):
|
132 |
+
"""
|
133 |
+
Returns the metadata for the Nature_News_Fetcher_Tool.
|
134 |
+
|
135 |
+
Returns:
|
136 |
+
dict: A dictionary containing the tool's metadata.
|
137 |
+
"""
|
138 |
+
metadata = super().get_metadata()
|
139 |
+
return metadata
|
140 |
+
|
141 |
+
|
142 |
+
if __name__ == "__main__":
|
143 |
+
# Test command:
|
144 |
+
"""
|
145 |
+
Run the following commands in the terminal to test the script:
|
146 |
+
|
147 |
+
cd octotools/tools/nature_news_fetcher
|
148 |
+
python tool.py
|
149 |
+
"""
|
150 |
+
|
151 |
+
# Get the directory of the current script
|
152 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
153 |
+
|
154 |
+
# Example usage of the Nature_News_Fetcher_Tool
|
155 |
+
tool = Nature_News_Fetcher_Tool()
|
156 |
+
|
157 |
+
# Get tool metadata
|
158 |
+
metadata = tool.get_metadata()
|
159 |
+
print(metadata)
|
160 |
+
|
161 |
+
import json
|
162 |
+
|
163 |
+
|
164 |
+
# Execute the tool to fetch the latest 10 articles (for demonstration purposes)
|
165 |
+
try:
|
166 |
+
execution = tool.execute(num_articles=10, max_pages=1)
|
167 |
+
print(json.dumps(execution, indent=4))
|
168 |
+
print("\nExecution Result:")
|
169 |
+
print(f"Number of articles fetched: {len(execution)}")
|
170 |
+
print("\nSample articles:")
|
171 |
+
for i, article in enumerate(execution[:10], 1):
|
172 |
+
print(f"\n{i}. Title: {article['title']}")
|
173 |
+
print(f" URL: {article['url']}")
|
174 |
+
print(f" Description: {article['description'][:100]}...") # Show first 100 characters
|
175 |
+
print(f" Authors: {', '.join(article['authors'])}")
|
176 |
+
print(f" Date: {article['date']}")
|
177 |
+
print(f" Image URL: {article['image_url']}")
|
178 |
+
except Exception as e:
|
179 |
+
print(f"Execution failed: {e}")
|
180 |
+
|
181 |
+
print("Done!")
|
octotools/tools/object_detector/tool.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Grounding DINO Object Detection Tool
|
2 |
+
# https://huggingface.co/IDEA-Research/grounding-dino
|
3 |
+
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
import torch
|
7 |
+
from transformers import pipeline
|
8 |
+
|
9 |
+
from octotools.tools.base import BaseTool
|
10 |
+
from PIL import Image, ImageOps
|
11 |
+
|
12 |
+
import os
|
13 |
+
# If CUDA_HOME is set, print the value
|
14 |
+
print(os.environ.get('CUDA_HOME', 'CUDA_HOME is not set'))
|
15 |
+
|
16 |
+
# Suppress stderr by redirecting it to /dev/null
|
17 |
+
import sys
|
18 |
+
sys.stderr = open(os.devnull, 'w')
|
19 |
+
|
20 |
+
import warnings
|
21 |
+
warnings.filterwarnings("ignore")
|
22 |
+
|
23 |
+
|
24 |
+
class Object_Detector_Tool(BaseTool):
|
25 |
+
def __init__(self):
|
26 |
+
super().__init__(
|
27 |
+
tool_name="Object_Detector_Tool",
|
28 |
+
tool_description="A tool that detects objects in an image using the Grounding DINO model and saves individual object images with empty padding.",
|
29 |
+
tool_version="1.0.0",
|
30 |
+
input_types={
|
31 |
+
"image": "str - The path to the image file.",
|
32 |
+
"labels": "list - A list of object labels to detect.",
|
33 |
+
"threshold": "float - The confidence threshold for detection (default: 0.35).",
|
34 |
+
"model_size": "str - The size of the model to use ('tiny' or 'base', default: 'tiny').",
|
35 |
+
"padding": "int - The number of pixels to add as empty padding around detected objects (default: 20)."
|
36 |
+
},
|
37 |
+
output_type="list - A list of detected objects with their scores, bounding boxes, and saved image paths.",
|
38 |
+
demo_commands=[
|
39 |
+
{
|
40 |
+
"command": 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])',
|
41 |
+
"description": "Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths."
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"command": 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)',
|
45 |
+
"description": "Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths."
|
46 |
+
}
|
47 |
+
],
|
48 |
+
user_metadata={
|
49 |
+
"limitation": "The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification."
|
50 |
+
}
|
51 |
+
)
|
52 |
+
|
53 |
+
def preprocess_caption(self, caption):
|
54 |
+
result = caption.lower().strip()
|
55 |
+
if result.endswith("."):
|
56 |
+
return result
|
57 |
+
return result + "."
|
58 |
+
|
59 |
+
def build_tool(self, model_size='tiny'):
|
60 |
+
model_name = f"IDEA-Research/grounding-dino-{model_size}"
|
61 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
62 |
+
try:
|
63 |
+
pipe = pipeline(model=model_name, task="zero-shot-object-detection", device=device)
|
64 |
+
return pipe
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Error building the Object Detection tool: {e}")
|
67 |
+
return None
|
68 |
+
|
69 |
+
def save_detected_object(self, image, box, image_name, label, index, padding):
|
70 |
+
object_image = image.crop(box)
|
71 |
+
padded_image = ImageOps.expand(object_image, border=padding, fill='white')
|
72 |
+
|
73 |
+
filename = f"{image_name}_{label}_{index}.png"
|
74 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
75 |
+
save_path = os.path.join(self.output_dir, filename)
|
76 |
+
|
77 |
+
padded_image.save(save_path)
|
78 |
+
return save_path
|
79 |
+
|
80 |
+
def execute(self, image, labels, threshold=0.35, model_size='tiny', padding=20, max_retries=10, retry_delay=5, clear_cuda_cache=False):
|
81 |
+
for attempt in range(max_retries):
|
82 |
+
try:
|
83 |
+
saved_files = []
|
84 |
+
|
85 |
+
pipe = self.build_tool(model_size)
|
86 |
+
if pipe is None:
|
87 |
+
raise ValueError("Failed to build the Object Detection tool.")
|
88 |
+
|
89 |
+
preprocessed_labels = [self.preprocess_caption(label) for label in labels]
|
90 |
+
results = pipe(image, candidate_labels=preprocessed_labels, threshold=threshold)
|
91 |
+
|
92 |
+
formatted_results = []
|
93 |
+
original_image = Image.open(image)
|
94 |
+
image_name = os.path.splitext(os.path.basename(image))[0]
|
95 |
+
|
96 |
+
object_counts = {}
|
97 |
+
|
98 |
+
for result in results:
|
99 |
+
box = tuple(result["box"].values())
|
100 |
+
label = result["label"]
|
101 |
+
score = round(result["score"], 2)
|
102 |
+
if label.endswith("."):
|
103 |
+
label = label[:-1]
|
104 |
+
|
105 |
+
object_counts[label] = object_counts.get(label, 0) + 1
|
106 |
+
index = object_counts[label]
|
107 |
+
|
108 |
+
save_path = self.save_detected_object(original_image, box, image_name, label, index, padding)
|
109 |
+
|
110 |
+
formatted_results.append({
|
111 |
+
"label": label,
|
112 |
+
"confidence score": score,
|
113 |
+
"box": box,
|
114 |
+
"saved_image_path": save_path
|
115 |
+
})
|
116 |
+
|
117 |
+
return formatted_results
|
118 |
+
|
119 |
+
except RuntimeError as e:
|
120 |
+
if "CUDA out of memory" in str(e):
|
121 |
+
print(f"CUDA out of memory error on attempt {attempt + 1}.")
|
122 |
+
if clear_cuda_cache:
|
123 |
+
print("Clearing CUDA cache and retrying...")
|
124 |
+
torch.cuda.empty_cache()
|
125 |
+
else:
|
126 |
+
print(f"Retrying in {retry_delay} seconds...")
|
127 |
+
time.sleep(retry_delay)
|
128 |
+
continue
|
129 |
+
else:
|
130 |
+
print(f"Runtime error: {e}")
|
131 |
+
break
|
132 |
+
except Exception as e:
|
133 |
+
print(f"Error detecting objects: {e}")
|
134 |
+
break
|
135 |
+
|
136 |
+
print(f"Failed to detect objects after {max_retries} attempts.")
|
137 |
+
return []
|
138 |
+
|
139 |
+
def get_metadata(self):
|
140 |
+
metadata = super().get_metadata()
|
141 |
+
return metadata
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
# Test command:
|
145 |
+
"""
|
146 |
+
Run the following commands in the terminal to test the script:
|
147 |
+
|
148 |
+
cd octotools/tools/object_detector
|
149 |
+
python tool.py
|
150 |
+
"""
|
151 |
+
|
152 |
+
# Get the directory of the current script
|
153 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
154 |
+
|
155 |
+
# Example usage of the Object_Detector_Tool
|
156 |
+
tool = Object_Detector_Tool()
|
157 |
+
tool.set_custom_output_dir("detected_objects")
|
158 |
+
|
159 |
+
# Get tool metadata
|
160 |
+
metadata = tool.get_metadata()
|
161 |
+
print(metadata)
|
162 |
+
|
163 |
+
# Construct the full path to the image using the script's directory
|
164 |
+
relative_image_path = "examples/baseball.png"
|
165 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
166 |
+
|
167 |
+
# Execute the tool
|
168 |
+
try:
|
169 |
+
execution = tool.execute(image=image_path, labels=["baseball", "basket"], padding=20)
|
170 |
+
print("Detected Objects:")
|
171 |
+
for obj in execution:
|
172 |
+
print(f"Detected {obj['label']} with confidence {obj['confidence score']}")
|
173 |
+
print(f"Bounding box: {obj['box']}")
|
174 |
+
print(f"Saved image (with padding): {obj['saved_image_path']}")
|
175 |
+
print()
|
176 |
+
except ValueError as e:
|
177 |
+
print(f"Execution failed: {e}")
|
178 |
+
|
179 |
+
print("Done!")
|
octotools/tools/pubmed_search/tool.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from pymed import PubMed
|
4 |
+
from metapub import PubMedFetcher
|
5 |
+
from octotools.tools.base import BaseTool
|
6 |
+
from tenacity import (
|
7 |
+
retry,
|
8 |
+
stop_after_attempt,
|
9 |
+
wait_random_exponential,
|
10 |
+
)
|
11 |
+
|
12 |
+
# Suppress stderr by redirecting it to /dev/null
|
13 |
+
import sys
|
14 |
+
sys.stderr = open(os.devnull, 'w')
|
15 |
+
|
16 |
+
import warnings
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
|
19 |
+
|
20 |
+
class Pubmed_Search_Tool(BaseTool):
|
21 |
+
def __init__(self):
|
22 |
+
super().__init__(
|
23 |
+
tool_name="Pubmed_Search_Tool",
|
24 |
+
tool_description="A tool that searches PubMed Central to retrieve relevant article abstracts based on a given list of text queries. Use this ONLY if you cannot use the other more specific ontology tools.",
|
25 |
+
tool_version="1.0.0",
|
26 |
+
input_types={
|
27 |
+
"queries": "list[str] - list of queries terms for searching PubMed."
|
28 |
+
},
|
29 |
+
output_type="list - List of items matching the search query. Each item consists of the title, abstract, keywords, and URL of the article. If no results found, a string message is returned.",
|
30 |
+
demo_commands=[
|
31 |
+
{
|
32 |
+
"command": 'execution = tool.execute(queries=["scoliosis", "injury"])',
|
33 |
+
"description": "Search for PubMed articles mentioning 'scoliosis' OR 'injury'."
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"command": 'execution = tool.execute(queries=["COVID", "vaccine", "occupational health"])',
|
37 |
+
"description": "Search for PubMed articles mentioning 'COVID' OR 'vaccine' OR 'occupational health'."
|
38 |
+
}
|
39 |
+
],
|
40 |
+
user_metadata={
|
41 |
+
'limitations': "Try to use shorter and more general search queries."
|
42 |
+
}
|
43 |
+
)
|
44 |
+
self.pubmed = PubMed(tool="MyTool", email="[email protected]")
|
45 |
+
self.fetch = PubMedFetcher()
|
46 |
+
|
47 |
+
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(3))
|
48 |
+
def search_query(self, query_str, max_results=10):
|
49 |
+
return self.pubmed.query(query_str, max_results=max_results)
|
50 |
+
|
51 |
+
def execute(self, queries, max_results=10):
|
52 |
+
try:
|
53 |
+
query_str = f"({'[Title/Abstract] OR '.join(queries) + '[Title/Abstract]'}) AND hasabstract[All Fields] AND fha[Filter]"
|
54 |
+
max_results = min(max_results, 50)
|
55 |
+
|
56 |
+
results = self.search_query(query_str, max_results=max_results) # API can only get most recent
|
57 |
+
|
58 |
+
items = []
|
59 |
+
for article in results:
|
60 |
+
try:
|
61 |
+
article = json.loads(article.toJSON())
|
62 |
+
pubmed_id = article['pubmed_id'] # get id using pymed then get content using metapub
|
63 |
+
|
64 |
+
article = self.fetch.article_by_pmid(pubmed_id)
|
65 |
+
items.append({
|
66 |
+
'title': article.title,
|
67 |
+
'abstract': article.abstract,
|
68 |
+
'keywords': article.keywords,
|
69 |
+
'url': article.url
|
70 |
+
})
|
71 |
+
except:
|
72 |
+
continue
|
73 |
+
|
74 |
+
if len(items) == 0:
|
75 |
+
return "No results found for search query. Try another query or tool."
|
76 |
+
|
77 |
+
return items
|
78 |
+
|
79 |
+
except Exception as e:
|
80 |
+
print(f"Error searching PubMed: {e}")
|
81 |
+
return []
|
82 |
+
|
83 |
+
def get_metadata(self):
|
84 |
+
metadata = super().get_metadata()
|
85 |
+
return metadata
|
86 |
+
|
87 |
+
if __name__ == "__main__":
|
88 |
+
# Test command:
|
89 |
+
"""
|
90 |
+
Run the following commands in the terminal to test the script:
|
91 |
+
|
92 |
+
cd octotools/tools/pubmed_search
|
93 |
+
python tool.py
|
94 |
+
"""
|
95 |
+
|
96 |
+
# Get the directory of the current script
|
97 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
98 |
+
|
99 |
+
# Example usage
|
100 |
+
tool = Pubmed_Search_Tool()
|
101 |
+
|
102 |
+
# Queries
|
103 |
+
queries = ["COVID occupational health"]
|
104 |
+
|
105 |
+
# Execute the tool
|
106 |
+
try:
|
107 |
+
execution = tool.execute(queries=queries)
|
108 |
+
print(execution)
|
109 |
+
except ValueError as e:
|
110 |
+
print(f"Execution failed: {e}")
|
111 |
+
|
112 |
+
print("Done!")
|
octotools/tools/python_code_generator/tool.py
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# octotools/tools/python_code_generator/tool.py
|
2 |
+
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import sys
|
6 |
+
from io import StringIO
|
7 |
+
import contextlib
|
8 |
+
|
9 |
+
|
10 |
+
from octotools.tools.base import BaseTool
|
11 |
+
from octotools.engine.openai import ChatOpenAI
|
12 |
+
|
13 |
+
import signal
|
14 |
+
from contextlib import contextmanager
|
15 |
+
|
16 |
+
# Custom exception for code execution timeout
|
17 |
+
class TimeoutException(Exception):
|
18 |
+
pass
|
19 |
+
|
20 |
+
# Custom context manager for code execution timeout
|
21 |
+
@contextmanager
|
22 |
+
def timeout(seconds):
|
23 |
+
def timeout_handler(signum, frame):
|
24 |
+
raise TimeoutException("Code execution timed out")
|
25 |
+
|
26 |
+
# Set the timeout handler
|
27 |
+
original_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
28 |
+
signal.alarm(seconds)
|
29 |
+
|
30 |
+
try:
|
31 |
+
yield
|
32 |
+
finally:
|
33 |
+
# Restore the original handler and disable the alarm
|
34 |
+
signal.alarm(0)
|
35 |
+
signal.signal(signal.SIGALRM, original_handler)
|
36 |
+
|
37 |
+
|
38 |
+
class Python_Code_Generator_Tool(BaseTool):
|
39 |
+
require_llm_engine = True
|
40 |
+
|
41 |
+
def __init__(self, model_string="gpt-4o-mini"):
|
42 |
+
super().__init__(
|
43 |
+
tool_name="Python_Code_Generator_Tool",
|
44 |
+
tool_description="A tool that generates and executes simple Python code snippets for basic arithmetical calculations and math-related problems. The generated code runs in a highly restricted environment with only basic mathematical operations available.",
|
45 |
+
tool_version="1.0.0",
|
46 |
+
input_types={
|
47 |
+
"query": "str - A clear, specific description of the arithmetic calculation or math problem to be solved, including any necessary numerical inputs."},
|
48 |
+
output_type="dict - A dictionary containing the generated code, calculation result, and any error messages.",
|
49 |
+
demo_commands=[
|
50 |
+
{
|
51 |
+
"command": 'execution = tool.execute(query="Calculate the factorial of 5")',
|
52 |
+
"description": "Generate a Python code snippet to calculate the factorial of 5."
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"command": 'execution = tool.execute(query="Find the sum of prime numbers up to 50")',
|
56 |
+
"description": "Generate a Python code snippet to find the sum of prime numbers up to 50."
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"command": 'query="Given the list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], calculate the sum of squares of odd numbers"\nexecution = tool.execute(query=query)',
|
60 |
+
"description": "Generate a Python function for a specific mathematical operation on a given list of numbers."
|
61 |
+
},
|
62 |
+
],
|
63 |
+
user_metadata = {
|
64 |
+
"limitations": [
|
65 |
+
"Restricted to basic Python arithmetic operations and built-in mathematical functions.",
|
66 |
+
"Cannot use any external libraries or modules, including those in the Python standard library.",
|
67 |
+
"Limited to simple mathematical calculations and problems.",
|
68 |
+
"Cannot perform any string processing, data structure manipulation, or complex algorithms.",
|
69 |
+
"No access to any system resources, file operations, or network requests.",
|
70 |
+
"Cannot use 'import' statements.",
|
71 |
+
"All calculations must be self-contained within a single function or script.",
|
72 |
+
"Input must be provided directly in the query string.",
|
73 |
+
"Output is limited to numerical results or simple lists/tuples of numbers."
|
74 |
+
],
|
75 |
+
"best_practices": [
|
76 |
+
"Provide clear and specific queries that describe the desired mathematical calculation.",
|
77 |
+
"Include all necessary numerical inputs directly in the query string.",
|
78 |
+
"Keep tasks focused on basic arithmetic, algebraic calculations, or simple mathematical algorithms.",
|
79 |
+
"Ensure all required numerical data is included in the query.",
|
80 |
+
"Verify that the query only involves mathematical operations and does not require any data processing or complex algorithms.",
|
81 |
+
"Review generated code to ensure it only uses basic Python arithmetic operations and built-in math functions."
|
82 |
+
]
|
83 |
+
}
|
84 |
+
)
|
85 |
+
print(f"\nInitializing Python_Code_Generator_Tool with model_string: {model_string}")
|
86 |
+
self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=False) if model_string else None
|
87 |
+
|
88 |
+
@staticmethod
|
89 |
+
def preprocess_code(code):
|
90 |
+
"""
|
91 |
+
Preprocesses the generated code snippet by extracting it from the response.
|
92 |
+
|
93 |
+
Parameters:
|
94 |
+
code (str): The response containing the code snippet.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
str: The extracted code snippet.
|
98 |
+
"""
|
99 |
+
code = re.search(r"```python(.*)```", code, re.DOTALL).group(1).strip()
|
100 |
+
return code
|
101 |
+
|
102 |
+
@contextlib.contextmanager
|
103 |
+
def capture_output(self):
|
104 |
+
"""
|
105 |
+
Context manager to capture the standard output.
|
106 |
+
|
107 |
+
Yields:
|
108 |
+
StringIO: The captured output.
|
109 |
+
"""
|
110 |
+
new_out = StringIO()
|
111 |
+
old_out = sys.stdout
|
112 |
+
sys.stdout = new_out
|
113 |
+
try:
|
114 |
+
yield sys.stdout
|
115 |
+
finally:
|
116 |
+
sys.stdout = old_out
|
117 |
+
|
118 |
+
def execute_code_snippet(self, code):
|
119 |
+
"""
|
120 |
+
Executes the given Python code snippet.
|
121 |
+
|
122 |
+
Parameters:
|
123 |
+
code (str): The Python code snippet to be executed.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
dict: A dictionary containing the printed output and local variables.
|
127 |
+
"""
|
128 |
+
# Check for dangerous functions and remove them
|
129 |
+
dangerous_functions = ['exit', 'quit', 'sys.exit']
|
130 |
+
for func in dangerous_functions:
|
131 |
+
if func in code:
|
132 |
+
print(f"Warning: Removing unsafe '{func}' call from code")
|
133 |
+
# Use regex to remove function calls with any arguments
|
134 |
+
code = re.sub(rf'{func}\s*\([^)]*\)', 'break', code)
|
135 |
+
|
136 |
+
try:
|
137 |
+
execution_code = self.preprocess_code(code)
|
138 |
+
|
139 |
+
# Execute with 10-second timeout
|
140 |
+
with timeout(10):
|
141 |
+
try:
|
142 |
+
exec(execution_code)
|
143 |
+
except TimeoutException:
|
144 |
+
print("Error: Code execution exceeded 60 seconds timeout")
|
145 |
+
return {"error": "Execution timed out after 60 seconds"}
|
146 |
+
except Exception as e:
|
147 |
+
print(f"Error executing code: {e}")
|
148 |
+
return {"error": str(e)}
|
149 |
+
|
150 |
+
# Capture the output and local variables
|
151 |
+
local_vars = {}
|
152 |
+
with self.capture_output() as output:
|
153 |
+
exec(execution_code, {}, local_vars)
|
154 |
+
printed_output = output.getvalue().strip()
|
155 |
+
|
156 |
+
# Filter out built-in variables and modules
|
157 |
+
"""
|
158 |
+
only the variables used in the code are returned,
|
159 |
+
excluding built-in variables (which start with '__') and imported modules.
|
160 |
+
"""
|
161 |
+
used_vars = {k: v for k, v in local_vars.items()
|
162 |
+
if not k.startswith('__') and not isinstance(v, type(sys))}
|
163 |
+
|
164 |
+
return {"printed_output": printed_output, "variables": used_vars}
|
165 |
+
|
166 |
+
except Exception as e:
|
167 |
+
print(f"Error executing code: {e}")
|
168 |
+
return {"error": str(e)}
|
169 |
+
|
170 |
+
def execute(self, query):
|
171 |
+
"""
|
172 |
+
Generates and executes Python code based on the provided query.
|
173 |
+
|
174 |
+
Parameters:
|
175 |
+
query (str): A query describing the desired operation.
|
176 |
+
|
177 |
+
Returns:
|
178 |
+
dict: A dictionary containing the executed output, local variables, or any error message.
|
179 |
+
"""
|
180 |
+
|
181 |
+
if not self.llm_engine:
|
182 |
+
raise ValueError("LLM engine not initialized. Please provide a valid model_string when initializing the tool.")
|
183 |
+
|
184 |
+
task_description = """
|
185 |
+
Given a query, generate a Python code snippet that performs the specified operation on the provided data. Please think step by step. Ensure to break down the process into clear, logical steps. Make sure to print the final result in the generated code snippet with a descriptive message explaining what the output represents. The final output should be presented in the following format:
|
186 |
+
|
187 |
+
```python
|
188 |
+
<code snippet>
|
189 |
+
```
|
190 |
+
"""
|
191 |
+
task_description = task_description.strip()
|
192 |
+
full_prompt = f"Task:\n{task_description}\n\nQuery:\n{query}"
|
193 |
+
|
194 |
+
response = self.llm_engine(full_prompt)
|
195 |
+
result_or_error = self.execute_code_snippet(response)
|
196 |
+
return result_or_error
|
197 |
+
|
198 |
+
def get_metadata(self):
|
199 |
+
"""
|
200 |
+
Returns the metadata for the Python_Code_Generator_Tool.
|
201 |
+
|
202 |
+
Returns:
|
203 |
+
dict: A dictionary containing the tool's metadata.
|
204 |
+
"""
|
205 |
+
metadata = super().get_metadata()
|
206 |
+
metadata["require_llm_engine"] = self.require_llm_engine # NOTE: can be removed if not needed
|
207 |
+
return metadata
|
208 |
+
|
209 |
+
|
210 |
+
if __name__ == "__main__":
|
211 |
+
# Test command:
|
212 |
+
"""
|
213 |
+
Run the following commands in the terminal to test the script:
|
214 |
+
|
215 |
+
cd octotools/tools/python_code_generator
|
216 |
+
python tool.py
|
217 |
+
"""
|
218 |
+
|
219 |
+
# Get the directory of the current script
|
220 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
221 |
+
|
222 |
+
# Example usage of the Python_Code_Generator_Tool
|
223 |
+
tool = Python_Code_Generator_Tool()
|
224 |
+
tool = Python_Code_Generator_Tool(model_string="gpt-4o-mini")
|
225 |
+
|
226 |
+
# Get tool metadata
|
227 |
+
metadata = tool.get_metadata()
|
228 |
+
print(metadata)
|
229 |
+
|
230 |
+
# Sample query for generating and executing Python code
|
231 |
+
queries = [
|
232 |
+
"Given the number list: [1, 2, 3, 4, 5], calculate the sum of all the numbers in the list.",
|
233 |
+
]
|
234 |
+
for query in queries:
|
235 |
+
print(f"\n###Query: {query}")
|
236 |
+
# Execute the tool with the sample query
|
237 |
+
try:
|
238 |
+
execution = tool.execute(query=query)
|
239 |
+
print("\n###Execution Result:", execution)
|
240 |
+
except ValueError as e:
|
241 |
+
print(f"Execution failed: {e}")
|
242 |
+
|
243 |
+
print("Done!")
|
octotools/tools/relevant_patch_zoomer/tool.py
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from octotools.tools.base import BaseTool
|
5 |
+
from octotools.engine.openai import ChatOpenAI
|
6 |
+
|
7 |
+
class PatchZoomerResponse(BaseModel):
|
8 |
+
analysis: str
|
9 |
+
patch: list[str]
|
10 |
+
|
11 |
+
class Relevant_Patch_Zoomer_Tool(BaseTool):
|
12 |
+
require_llm_engine = True
|
13 |
+
|
14 |
+
def __init__(self, model_string="gpt-4o"):
|
15 |
+
super().__init__(
|
16 |
+
tool_name="Relevant_Patch_Zoomer_Tool",
|
17 |
+
tool_description="A tool that analyzes an image, divides it into 5 regions (4 quarters + center), and identifies the most relevant patches based on a question. The returned patches are zoomed in by a factor of 2.",
|
18 |
+
tool_version="1.0.0",
|
19 |
+
input_types={
|
20 |
+
"image": "str - The path to the image file.",
|
21 |
+
"question": "str - The question about the image content.",
|
22 |
+
},
|
23 |
+
output_type="dict - Contains analysis text and list of saved zoomed patch paths.",
|
24 |
+
demo_commands=[
|
25 |
+
{
|
26 |
+
"command": 'execution = tool.execute(image="path/to/image.jpg", question="What is the color of the car?")',
|
27 |
+
"description": "Analyze image and return relevant zoomed patches that show the car's color."
|
28 |
+
}
|
29 |
+
],
|
30 |
+
user_metadata = {
|
31 |
+
"best_practices": [
|
32 |
+
"It might be helpful to zoom in on the image first to get a better look at the object(s).",
|
33 |
+
"It might be helpful if the question requires a close-up view of the object(s), symbols, texts, etc.",
|
34 |
+
"The tool should be used to provide a high-level analysis first, and then use other tools for fine-grained analysis. For example, you can use Relevant_Patch_Zoomer_Tool first to get a zoomed patch of specific objects, and then use Image_Captioner_Tool to describe the objects in detail."
|
35 |
+
]
|
36 |
+
}
|
37 |
+
)
|
38 |
+
self.matching_dict = {
|
39 |
+
"A": "top-left",
|
40 |
+
"B": "top-right",
|
41 |
+
"C": "bottom-left",
|
42 |
+
"D": "bottom-right",
|
43 |
+
"E": "center"
|
44 |
+
}
|
45 |
+
|
46 |
+
print(f"\nInitializing Patch Zoomer Tool with model: {model_string}")
|
47 |
+
self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=True) if model_string else None
|
48 |
+
|
49 |
+
def _save_patch(self, image_path, patch, save_path, zoom_factor=2):
|
50 |
+
"""Extract and save a specific patch from the image with 10% margins."""
|
51 |
+
img = cv2.imread(image_path)
|
52 |
+
height, width = img.shape[:2]
|
53 |
+
|
54 |
+
quarter_h = height // 2
|
55 |
+
quarter_w = width // 2
|
56 |
+
|
57 |
+
margin_h = int(quarter_h * 0.1)
|
58 |
+
margin_w = int(quarter_w * 0.1)
|
59 |
+
|
60 |
+
patch_coords = {
|
61 |
+
'A': ((max(0, 0 - margin_w), max(0, 0 - margin_h)),
|
62 |
+
(min(width, quarter_w + margin_w), min(height, quarter_h + margin_h))),
|
63 |
+
'B': ((max(0, quarter_w - margin_w), max(0, 0 - margin_h)),
|
64 |
+
(min(width, width + margin_w), min(height, quarter_h + margin_h))),
|
65 |
+
'C': ((max(0, 0 - margin_w), max(0, quarter_h - margin_h)),
|
66 |
+
(min(width, quarter_w + margin_w), min(height, height + margin_h))),
|
67 |
+
'D': ((max(0, quarter_w - margin_w), max(0, quarter_h - margin_h)),
|
68 |
+
(min(width, width + margin_w), min(height, height + margin_h))),
|
69 |
+
'E': ((max(0, quarter_w//2 - margin_w), max(0, quarter_h//2 - margin_h)),
|
70 |
+
(min(width, quarter_w//2 + quarter_w + margin_w),
|
71 |
+
min(height, quarter_h//2 + quarter_h + margin_h)))
|
72 |
+
}
|
73 |
+
|
74 |
+
(x1, y1), (x2, y2) = patch_coords[patch]
|
75 |
+
patch_img = img[y1:y2, x1:x2]
|
76 |
+
|
77 |
+
zoomed_patch = cv2.resize(patch_img,
|
78 |
+
(patch_img.shape[1] * zoom_factor,
|
79 |
+
patch_img.shape[0] * zoom_factor),
|
80 |
+
interpolation=cv2.INTER_LINEAR)
|
81 |
+
|
82 |
+
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
83 |
+
cv2.imwrite(save_path, zoomed_patch)
|
84 |
+
return save_path
|
85 |
+
|
86 |
+
def execute(self, image, question, zoom_factor=2):
|
87 |
+
try:
|
88 |
+
if not self.llm_engine:
|
89 |
+
return "Error: LLM engine not initialized. Please provide a valid model_string."
|
90 |
+
|
91 |
+
# Prepare the prompt
|
92 |
+
prompt = f"""
|
93 |
+
Analyze this image to identify the most relevant region(s) for answering the question:
|
94 |
+
|
95 |
+
Question: {question}
|
96 |
+
|
97 |
+
The image is divided into 5 regions:
|
98 |
+
- (A) Top-left quarter
|
99 |
+
- (B) Top-right quarter
|
100 |
+
- (C) Bottom-left quarter
|
101 |
+
- (D) Bottom-right quarter
|
102 |
+
- (E) Center region (1/4 size, overlapping middle section)
|
103 |
+
|
104 |
+
Instructions:
|
105 |
+
1. First describe what you see in each of the five regions.
|
106 |
+
2. Then select the most relevant region(s) to answer the question.
|
107 |
+
3. Choose only the minimum necessary regions - avoid selecting redundant areas that show the same content. For example, if one patch contains the entire object(s), do not select another patch that only shows a part of the same object(s).
|
108 |
+
|
109 |
+
|
110 |
+
Response format:
|
111 |
+
<analysis>: Describe the image and five patches first. Then analyze the question and select the most relevant patch or list of patches.
|
112 |
+
<patch>: List of letters (A-E)
|
113 |
+
"""
|
114 |
+
# Read image and create input data
|
115 |
+
with open(image, 'rb') as file:
|
116 |
+
image_bytes = file.read()
|
117 |
+
input_data = [prompt, image_bytes]
|
118 |
+
|
119 |
+
# Get response from LLM
|
120 |
+
response = self.llm_engine(input_data, response_format=PatchZoomerResponse)
|
121 |
+
|
122 |
+
# Save patches
|
123 |
+
image_dir = os.path.dirname(image)
|
124 |
+
image_name = os.path.splitext(os.path.basename(image))[0]
|
125 |
+
|
126 |
+
# Update the return structure
|
127 |
+
patch_info = []
|
128 |
+
for patch in response.patch:
|
129 |
+
patch_name = self.matching_dict[patch]
|
130 |
+
save_path = os.path.join(self.output_dir,
|
131 |
+
f"{image_name}_{patch_name}_zoomed_{zoom_factor}x.png")
|
132 |
+
saved_path = self._save_patch(image, patch, save_path, zoom_factor)
|
133 |
+
save_path = os.path.abspath(saved_path)
|
134 |
+
patch_info.append({
|
135 |
+
"path": save_path,
|
136 |
+
"description": f"The {self.matching_dict[patch]} region of the image: {image}."
|
137 |
+
})
|
138 |
+
|
139 |
+
return {
|
140 |
+
"analysis": response.analysis,
|
141 |
+
"patches": patch_info
|
142 |
+
}
|
143 |
+
|
144 |
+
except Exception as e:
|
145 |
+
print(f"Error in patch zooming: {e}")
|
146 |
+
return None
|
147 |
+
|
148 |
+
def get_metadata(self):
|
149 |
+
return super().get_metadata()
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
# Test command:
|
153 |
+
"""
|
154 |
+
Run the following commands in the terminal to test the script:
|
155 |
+
|
156 |
+
cd octotools/tools/relevant_patch_zoomer
|
157 |
+
python tool.py
|
158 |
+
"""
|
159 |
+
|
160 |
+
# Get the directory of the current script
|
161 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
162 |
+
|
163 |
+
# Example usage of the Relevant_Patch_Zoomer_Tool
|
164 |
+
tool = Relevant_Patch_Zoomer_Tool()
|
165 |
+
tool.set_custom_output_dir(f"{script_dir}/zoomed_patches")
|
166 |
+
|
167 |
+
# Get tool metadata
|
168 |
+
metadata = tool.get_metadata()
|
169 |
+
print(metadata)
|
170 |
+
|
171 |
+
# Construct the full path to the image using the script's directory
|
172 |
+
relative_image_path = "examples/car.png"
|
173 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
174 |
+
question = "What is the color of the car?"
|
175 |
+
|
176 |
+
# Execute the tool
|
177 |
+
try:
|
178 |
+
result = tool.execute(image=image_path, question=question)
|
179 |
+
if result:
|
180 |
+
print("\nDetected Patches:")
|
181 |
+
for patch in result['patches']:
|
182 |
+
print(f"Path: {patch['path']}")
|
183 |
+
print(f"Description: {patch['description']}")
|
184 |
+
print()
|
185 |
+
except Exception as e:
|
186 |
+
print(f"Execution failed: {e}")
|
187 |
+
|
188 |
+
print("Done!")
|
octotools/tools/text_detector/tool.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# octotools/tools/text_detector/tool.py
|
2 |
+
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
from octotools.tools.base import BaseTool
|
6 |
+
|
7 |
+
import warnings
|
8 |
+
warnings.filterwarnings("ignore")
|
9 |
+
|
10 |
+
class Text_Detector_Tool(BaseTool):
|
11 |
+
def __init__(self):
|
12 |
+
super().__init__(
|
13 |
+
tool_name="Text_Detector_Tool",
|
14 |
+
tool_description="A tool that detects text in an image using EasyOCR.",
|
15 |
+
tool_version="1.0.0",
|
16 |
+
input_types={
|
17 |
+
"image": "str - The path to the image file.",
|
18 |
+
"languages": "list - A list of language codes for the OCR model.",
|
19 |
+
"detail": "int - The level of detail in the output. Set to 0 for simpler output, 1 for detailed output."
|
20 |
+
},
|
21 |
+
output_type="list - A list of detected text blocks.",
|
22 |
+
demo_commands=[
|
23 |
+
{
|
24 |
+
"command": 'execution = tool.execute(image="path/to/image.png", languages=["en"])',
|
25 |
+
"description": "Detect text in an image using the default language (English)."
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"command": 'execution = tool.execute(image="path/to/image.png", languages=["en", "de"])',
|
29 |
+
"description": "Detect text in an image using multiple languages (English and German)."
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"command": 'execution = tool.execute(image="path/to/image.png", languages=["en"], detail=0)',
|
33 |
+
"description": "Detect text in an image with simpler output (text without coordinates and scores)."
|
34 |
+
},
|
35 |
+
],
|
36 |
+
user_metadata={
|
37 |
+
"frequently_used_language": {
|
38 |
+
"ch_sim": "Simplified Chinese",
|
39 |
+
"ch_tra": "Traditional Chinese",
|
40 |
+
"de": "German",
|
41 |
+
"en": "English",
|
42 |
+
"es": "Spanish",
|
43 |
+
"fr": "French",
|
44 |
+
"hi": "Hindi",
|
45 |
+
"ja": "Japanese",
|
46 |
+
}
|
47 |
+
}
|
48 |
+
)
|
49 |
+
|
50 |
+
def build_tool(self, languages=None):
|
51 |
+
"""
|
52 |
+
Builds and returns the EasyOCR reader model.
|
53 |
+
|
54 |
+
Parameters:
|
55 |
+
languages (list): A list of language codes for the OCR model.
|
56 |
+
|
57 |
+
Returns:
|
58 |
+
easyocr.Reader: An initialized EasyOCR Reader object.
|
59 |
+
"""
|
60 |
+
languages = languages or ["en"] # Default to English if no languages provided
|
61 |
+
try:
|
62 |
+
import easyocr
|
63 |
+
reader = easyocr.Reader(languages)
|
64 |
+
return reader
|
65 |
+
except ImportError:
|
66 |
+
raise ImportError("Please install the EasyOCR package using 'pip install easyocr'.")
|
67 |
+
except Exception as e:
|
68 |
+
print(f"Error building the OCR tool: {e}")
|
69 |
+
return None
|
70 |
+
|
71 |
+
def execute(self, image, languages=None, max_retries=10, retry_delay=5, clear_cuda_cache=False, **kwargs):
|
72 |
+
"""
|
73 |
+
Executes the OCR tool to detect text in the provided image.
|
74 |
+
|
75 |
+
Parameters:
|
76 |
+
image (str): The path to the image file.
|
77 |
+
languages (list): A list of language codes for the OCR model.
|
78 |
+
max_retries (int): Maximum number of retry attempts.
|
79 |
+
retry_delay (int): Delay in seconds between retry attempts.
|
80 |
+
clear_cuda_cache (bool): Whether to clear CUDA cache on out-of-memory errors.
|
81 |
+
**kwargs: Additional keyword arguments for the OCR reader.
|
82 |
+
|
83 |
+
Returns:
|
84 |
+
list: A list of detected text blocks.
|
85 |
+
"""
|
86 |
+
languages = languages or ["en"]
|
87 |
+
|
88 |
+
for attempt in range(max_retries):
|
89 |
+
try:
|
90 |
+
reader = self.build_tool(languages)
|
91 |
+
if reader is None:
|
92 |
+
raise ValueError("Failed to build the OCR tool.")
|
93 |
+
|
94 |
+
result = reader.readtext(image, **kwargs)
|
95 |
+
try:
|
96 |
+
# detail = 1: Convert numpy types to standard Python types
|
97 |
+
cleaned_result = [
|
98 |
+
([[int(coord[0]), int(coord[1])] for coord in item[0]], item[1], round(float(item[2]), 2))
|
99 |
+
for item in result
|
100 |
+
]
|
101 |
+
return cleaned_result
|
102 |
+
except Exception as e:
|
103 |
+
# detail = 0
|
104 |
+
return result
|
105 |
+
|
106 |
+
except RuntimeError as e:
|
107 |
+
if "CUDA out of memory" in str(e):
|
108 |
+
print(f"CUDA out of memory error on attempt {attempt + 1}.")
|
109 |
+
if clear_cuda_cache:
|
110 |
+
print("Clearing CUDA cache and retrying...")
|
111 |
+
torch.cuda.empty_cache()
|
112 |
+
else:
|
113 |
+
print(f"Retrying in {retry_delay} seconds...")
|
114 |
+
time.sleep(retry_delay)
|
115 |
+
continue
|
116 |
+
else:
|
117 |
+
print(f"Runtime error: {e}")
|
118 |
+
break
|
119 |
+
except Exception as e:
|
120 |
+
print(f"Error detecting text: {e}")
|
121 |
+
break
|
122 |
+
|
123 |
+
print(f"Failed to detect text after {max_retries} attempts.")
|
124 |
+
return []
|
125 |
+
|
126 |
+
def get_metadata(self):
|
127 |
+
"""
|
128 |
+
Returns the metadata for the Text_Detector_Tool.
|
129 |
+
|
130 |
+
Returns:
|
131 |
+
dict: A dictionary containing the tool's metadata.
|
132 |
+
"""
|
133 |
+
metadata = super().get_metadata()
|
134 |
+
return metadata
|
135 |
+
|
136 |
+
if __name__ == "__main__":
|
137 |
+
# Test command:
|
138 |
+
"""
|
139 |
+
Run the following commands in the terminal to test the script:
|
140 |
+
|
141 |
+
cd octotools/tools/text_detector
|
142 |
+
python tool.py
|
143 |
+
"""
|
144 |
+
import json
|
145 |
+
|
146 |
+
# Get the directory of the current script
|
147 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
148 |
+
|
149 |
+
# Example usage of the Text_Detector_Tool
|
150 |
+
tool = Text_Detector_Tool()
|
151 |
+
|
152 |
+
# Get tool metadata
|
153 |
+
metadata = tool.get_metadata()
|
154 |
+
print(metadata)
|
155 |
+
|
156 |
+
# Construct the full path to the image using the script's directory
|
157 |
+
# relative_image_path = "examples/chinese_tra.jpg"
|
158 |
+
# relative_image_path = "examples/chinese.jpg"
|
159 |
+
relative_image_path = "examples/english.png"
|
160 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
161 |
+
|
162 |
+
# Execute the tool
|
163 |
+
try:
|
164 |
+
# execution = tool.execute(image=image_path, languages=["en", "ch_sim"])
|
165 |
+
# execution = tool.execute(image=image_path, languages=["en", "ch_tra"])
|
166 |
+
execution = tool.execute(image=image_path, languages=["en"])
|
167 |
+
print(json.dumps(execution))
|
168 |
+
|
169 |
+
print("Detected Text:", execution)
|
170 |
+
except ValueError as e:
|
171 |
+
print(f"Execution failed: {e}")
|
172 |
+
|
173 |
+
print("Done!")
|
octotools/tools/url_text_extractor/tool.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
|
5 |
+
from octotools.tools.base import BaseTool
|
6 |
+
|
7 |
+
class URL_Text_Extractor_Tool(BaseTool):
|
8 |
+
def __init__(self):
|
9 |
+
super().__init__(
|
10 |
+
tool_name="URL_Text_Extractor_Tool",
|
11 |
+
tool_description="A tool that extracts all text from a given URL.",
|
12 |
+
tool_version="1.0.0",
|
13 |
+
input_types={
|
14 |
+
"url": "str - The URL from which to extract text.",
|
15 |
+
},
|
16 |
+
output_type="dict - A dictionary containing the extracted text and any error messages.",
|
17 |
+
demo_commands=[
|
18 |
+
{
|
19 |
+
"command": 'execution = tool.execute(url="https://example.com")',
|
20 |
+
"description": "Extract all text from the example.com website."
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"command": 'execution = tool.execute(url="https://en.wikipedia.org/wiki/Python_(programming_language)")',
|
24 |
+
"description": "Extract all text from the Wikipedia page about Python programming language."
|
25 |
+
},
|
26 |
+
],
|
27 |
+
)
|
28 |
+
|
29 |
+
def extract_text_from_url(self, url):
|
30 |
+
"""
|
31 |
+
Extracts all text from the given URL.
|
32 |
+
|
33 |
+
Parameters:
|
34 |
+
url (str): The URL from which to extract text.
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
str: The extracted text.
|
38 |
+
"""
|
39 |
+
url = url.replace("arxiv.org/pdf", "arxiv.org/abs")
|
40 |
+
|
41 |
+
try:
|
42 |
+
response = requests.get(url)
|
43 |
+
response.raise_for_status()
|
44 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
45 |
+
text = soup.get_text(separator='\n', strip=True)
|
46 |
+
text = text[:10000] # Limit the text to 10000 characters
|
47 |
+
return text
|
48 |
+
except requests.RequestException as e:
|
49 |
+
return f"Error fetching URL: {str(e)}"
|
50 |
+
except Exception as e:
|
51 |
+
return f"Error extracting text: {str(e)}"
|
52 |
+
|
53 |
+
def execute(self, url):
|
54 |
+
extracted_text = self.extract_text_from_url(url)
|
55 |
+
return {
|
56 |
+
"url": url,
|
57 |
+
"extracted_text": extracted_text
|
58 |
+
}
|
59 |
+
|
60 |
+
def get_metadata(self):
|
61 |
+
"""
|
62 |
+
Returns the metadata for the URL_Text_Extractor_Tool.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
dict: A dictionary containing the tool's metadata.
|
66 |
+
"""
|
67 |
+
metadata = super().get_metadata()
|
68 |
+
return metadata
|
69 |
+
|
70 |
+
|
71 |
+
if __name__ == "__main__":
|
72 |
+
# Test command:
|
73 |
+
"""
|
74 |
+
Run the following commands in the terminal to test the script:
|
75 |
+
|
76 |
+
cd octotools/tools/url_text_extractor
|
77 |
+
python tool.py
|
78 |
+
"""
|
79 |
+
|
80 |
+
# Get the directory of the current script
|
81 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
82 |
+
|
83 |
+
# Example usage of the URL_Text_Extractor_Tool
|
84 |
+
tool = URL_Text_Extractor_Tool()
|
85 |
+
|
86 |
+
# Get tool metadata
|
87 |
+
metadata = tool.get_metadata()
|
88 |
+
print(metadata)
|
89 |
+
|
90 |
+
# Sample URL for extracting text
|
91 |
+
url = "https://en.wikipedia.org/wiki/Python_(programming_language)"
|
92 |
+
|
93 |
+
import json
|
94 |
+
|
95 |
+
# Execute the tool with the sample URL
|
96 |
+
try:
|
97 |
+
execution = tool.execute(url=url)
|
98 |
+
print("Execution Result:")
|
99 |
+
print(json.dumps(execution, indent=4))
|
100 |
+
for key, value in execution.items():
|
101 |
+
print(f"{key}:\n{value}\n")
|
102 |
+
except ValueError as e:
|
103 |
+
print(f"Execution failed: {e}")
|
104 |
+
|
105 |
+
print("Done!")
|
octotools/tools/wikipedia_knowledge_searcher/tool.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import wikipedia
|
3 |
+
|
4 |
+
from octotools.tools.base import BaseTool
|
5 |
+
|
6 |
+
class Wikipedia_Knowledge_Searcher_Tool(BaseTool):
|
7 |
+
def __init__(self):
|
8 |
+
super().__init__(
|
9 |
+
tool_name="Wikipedia_Knowledge_Searcher_Tool",
|
10 |
+
tool_description="A tool that searches Wikipedia and returns web text based on a given query.",
|
11 |
+
tool_version="1.0.0",
|
12 |
+
input_types={
|
13 |
+
"query": "str - The search query for Wikipedia.", },
|
14 |
+
output_type="dict - A dictionary containing the search results, extracted text, and any error messages.",
|
15 |
+
demo_commands=[
|
16 |
+
{
|
17 |
+
"command": 'execution = tool.execute(query="Python programming language")',
|
18 |
+
"description": "Search Wikipedia for information about Python programming language."
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"command": 'execution = tool.execute(query="Artificial Intelligence")',
|
22 |
+
"description": "Search Wikipedia for information about Artificial Intelligence"
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"command": 'execution = tool.execute(query="Theory of Relativity")',
|
26 |
+
"description": "Search Wikipedia for the full article about the Theory of Relativity."
|
27 |
+
},
|
28 |
+
],
|
29 |
+
)
|
30 |
+
|
31 |
+
def search_wikipedia(self, query, max_length=2000):
|
32 |
+
"""
|
33 |
+
Searches Wikipedia based on the given query and returns the text.
|
34 |
+
|
35 |
+
Parameters:
|
36 |
+
query (str): The search query for Wikipedia.
|
37 |
+
max_length (int): The maximum length of the returned text. Use -1 for full text.
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
tuple: (search_results, page_text)
|
41 |
+
"""
|
42 |
+
try:
|
43 |
+
search_results = wikipedia.search(query)
|
44 |
+
if not search_results:
|
45 |
+
return [], "No results found for the given query."
|
46 |
+
|
47 |
+
page = wikipedia.page(search_results[0])
|
48 |
+
text = page.content
|
49 |
+
|
50 |
+
if max_length != -1:
|
51 |
+
text = text[:max_length]
|
52 |
+
|
53 |
+
return search_results, text
|
54 |
+
except wikipedia.exceptions.DisambiguationError as e:
|
55 |
+
return e.options, f"DisambiguationError: {str(e)}"
|
56 |
+
except wikipedia.exceptions.PageError:
|
57 |
+
return [], f"PageError: No Wikipedia page found for '{query}'."
|
58 |
+
except Exception as e:
|
59 |
+
return [], f"Error searching Wikipedia: {str(e)}"
|
60 |
+
|
61 |
+
def execute(self, query, max_length=2000):
|
62 |
+
"""
|
63 |
+
Searches Wikipedia based on the provided query and returns the results.
|
64 |
+
|
65 |
+
Parameters:
|
66 |
+
query (str): The search query for Wikipedia.
|
67 |
+
max_length (int): The maximum length of the returned text. Use -1 for full text.
|
68 |
+
|
69 |
+
Returns:
|
70 |
+
dict: A dictionary containing the search results, extracted text, and formatted output.
|
71 |
+
"""
|
72 |
+
search_results, text = self.search_wikipedia(query, max_length)
|
73 |
+
|
74 |
+
formatted_output = f"Search results for '{query}':\n"
|
75 |
+
formatted_output += "\n".join(f"{i}. {result}" for i, result in enumerate(search_results, 1))
|
76 |
+
formatted_output += f"\n\nExtracted text:\n{text}"
|
77 |
+
|
78 |
+
return {
|
79 |
+
# "search_results": search_results,
|
80 |
+
# "extracted_text": text,
|
81 |
+
"output": formatted_output
|
82 |
+
}
|
83 |
+
|
84 |
+
def get_metadata(self):
|
85 |
+
"""
|
86 |
+
Returns the metadata for the Wikipedia_Knowledge_Searcher_Tool.
|
87 |
+
|
88 |
+
Returns:
|
89 |
+
dict: A dictionary containing the tool's metadata.
|
90 |
+
"""
|
91 |
+
metadata = super().get_metadata()
|
92 |
+
return metadata
|
93 |
+
|
94 |
+
|
95 |
+
if __name__ == "__main__":
|
96 |
+
# Test command:
|
97 |
+
"""
|
98 |
+
Run the following commands in the terminal to test the script:
|
99 |
+
|
100 |
+
cd octotools/tools/wikipedia_knowledge_searcher
|
101 |
+
python tool.py
|
102 |
+
"""
|
103 |
+
|
104 |
+
# Get the directory of the current script
|
105 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
106 |
+
|
107 |
+
# Example usage of the Wikipedia_Knowledge_Searcher_Tool
|
108 |
+
tool = Wikipedia_Knowledge_Searcher_Tool()
|
109 |
+
|
110 |
+
# Get tool metadata
|
111 |
+
metadata = tool.get_metadata()
|
112 |
+
print(metadata)
|
113 |
+
|
114 |
+
# Sample query for searching Wikipedia
|
115 |
+
# query = "Python programming language"
|
116 |
+
query = "kidney"
|
117 |
+
|
118 |
+
import json
|
119 |
+
|
120 |
+
# Execute the tool with the sample query
|
121 |
+
try:
|
122 |
+
execution = tool.execute(query=query)
|
123 |
+
print("Execution Result:")
|
124 |
+
print(json.dumps(execution, indent=4))
|
125 |
+
for key, value in execution.items():
|
126 |
+
print(f"{key}:\n{value}\n")
|
127 |
+
except ValueError as e:
|
128 |
+
print(f"Execution failed: {e}")
|
129 |
+
|
130 |
+
print("Done!")
|