fdaudens HF staff commited on
Commit
799b253
·
verified ·
1 Parent(s): bf90cd3

Update tools/visit_webpage.py

Browse files
Files changed (1) hide show
  1. tools/visit_webpage.py +8 -2
tools/visit_webpage.py CHANGED
@@ -3,6 +3,7 @@ from smolagents.tools import Tool
3
  import requests
4
  import markdownify
5
  import smolagents
 
6
 
7
  class VisitWebpageTool(Tool):
8
  name = "visit_webpage"
@@ -22,9 +23,14 @@ class VisitWebpageTool(Tool):
22
  "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
23
  ) from e
24
  try:
 
 
 
 
 
25
  # Send a GET request to the URL with a 20-second timeout
26
- response = requests.get(url, timeout=20)
27
- response.raise_for_status() # Raise an exception for bad status codes
28
 
29
  # Convert the HTML content to Markdown
30
  markdown_content = markdownify(response.text).strip()
 
3
  import requests
4
  import markdownify
5
  import smolagents
6
+ import re # Add re import here
7
 
8
  class VisitWebpageTool(Tool):
9
  name = "visit_webpage"
 
23
  "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
24
  ) from e
25
  try:
26
+ # Add user agent to avoid some blocking
27
+ headers = {
28
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
29
+ }
30
+
31
  # Send a GET request to the URL with a 20-second timeout
32
+ response = requests.get(url, timeout=20, headers=headers)
33
+ response.raise_for_status()
34
 
35
  # Convert the HTML content to Markdown
36
  markdown_content = markdownify(response.text).strip()