Spaces:
Running
Running
Update tools/visit_webpage.py
Browse files- tools/visit_webpage.py +8 -2
tools/visit_webpage.py
CHANGED
@@ -3,6 +3,7 @@ from smolagents.tools import Tool
|
|
3 |
import requests
|
4 |
import markdownify
|
5 |
import smolagents
|
|
|
6 |
|
7 |
class VisitWebpageTool(Tool):
|
8 |
name = "visit_webpage"
|
@@ -22,9 +23,14 @@ class VisitWebpageTool(Tool):
|
|
22 |
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
23 |
) from e
|
24 |
try:
|
|
|
|
|
|
|
|
|
|
|
25 |
# Send a GET request to the URL with a 20-second timeout
|
26 |
-
response = requests.get(url, timeout=20)
|
27 |
-
response.raise_for_status()
|
28 |
|
29 |
# Convert the HTML content to Markdown
|
30 |
markdown_content = markdownify(response.text).strip()
|
|
|
3 |
import requests
|
4 |
import markdownify
|
5 |
import smolagents
|
6 |
+
import re # Add re import here
|
7 |
|
8 |
class VisitWebpageTool(Tool):
|
9 |
name = "visit_webpage"
|
|
|
23 |
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
24 |
) from e
|
25 |
try:
|
26 |
+
# Add user agent to avoid some blocking
|
27 |
+
headers = {
|
28 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
29 |
+
}
|
30 |
+
|
31 |
# Send a GET request to the URL with a 20-second timeout
|
32 |
+
response = requests.get(url, timeout=20, headers=headers)
|
33 |
+
response.raise_for_status()
|
34 |
|
35 |
# Convert the HTML content to Markdown
|
36 |
markdown_content = markdownify(response.text).strip()
|