fdaudens HF staff commited on
Commit
bf90cd3
·
verified ·
1 Parent(s): 81db657

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -42
app.py CHANGED
@@ -1,51 +1,155 @@
1
- from typing import Any, Optional
2
- from smolagents.tools import Tool
 
 
3
  import requests
4
- import markdownify
5
- import smolagents
6
- import re # Add re import here
 
7
 
8
- class VisitWebpageTool(Tool):
9
- name = "visit_webpage"
10
- description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
11
- inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
12
- output_type = "string"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- def forward(self, url: str) -> str:
15
- try:
16
- import requests
17
- from markdownify import markdownify
18
- from requests.exceptions import RequestException
19
-
20
- from smolagents.utils import truncate_content
21
- except ImportError as e:
22
- raise ImportError(
23
- "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
24
- ) from e
 
 
25
  try:
26
- # Add user agent to avoid some blocking
27
- headers = {
28
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
29
- }
30
-
31
- # Send a GET request to the URL with a 20-second timeout
32
- response = requests.get(url, timeout=20, headers=headers)
33
- response.raise_for_status()
 
 
 
 
 
 
 
34
 
35
- # Convert the HTML content to Markdown
36
- markdown_content = markdownify(response.text).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # Remove multiple line breaks
39
- markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
 
40
 
41
- return truncate_content(markdown_content, 10000)
 
 
 
 
 
 
42
 
43
- except requests.exceptions.Timeout:
44
- return "The request timed out. Please try again later or check the URL."
45
- except RequestException as e:
46
- return f"Error fetching the webpage: {str(e)}"
47
- except Exception as e:
48
- return f"An unexpected error occurred: {str(e)}"
 
 
 
 
 
 
 
 
49
 
50
- def __init__(self, *args, **kwargs):
51
- self.is_initialized = False
 
 
1
+ from smolagents import CodeAgent, HfApiModel, tool
2
+ from tools.final_answer import FinalAnswerTool
3
+ from tools.visit_webpage import VisitWebpageTool
4
+ from Gradio_UI import GradioUI
5
  import requests
6
+ import yaml
7
+ import os
8
+ from typing import Dict, List, Optional
9
+ import re # Add this import at the top with other imports
10
 
11
+ @tool
12
+ def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
13
+ """Fetches recent news articles about any topic using Serper.dev.
14
+
15
+ Args:
16
+ topic: The topic to search for news about
17
+ num_results: Number of news articles to retrieve (default: 5)
18
+
19
+ Returns:
20
+ List of dictionaries containing article information
21
+ """
22
+ try:
23
+ api_key = os.environ.get("SERPER_API_KEY")
24
+ if not api_key:
25
+ return "Error: SERPER_API_KEY not found in environment variables"
26
+
27
+ url = f"https://google.serper.dev/news"
28
+ headers = {
29
+ "X-API-KEY": api_key
30
+ }
31
+ params = {
32
+ "q": topic,
33
+ "gl": "us",
34
+ "hl": "en"
35
+ }
36
+
37
+ response = requests.get(url, headers=headers, params=params)
38
+ response.raise_for_status()
39
+
40
+ results = response.json()
41
+
42
+ if "news" not in results:
43
+ return []
44
+
45
+ articles = []
46
+ for article in results["news"][:num_results]:
47
+ articles.append({
48
+ 'title': article.get('title', 'No title'),
49
+ 'source': article.get('source', 'Unknown source'),
50
+ 'date': article.get('date', 'No date'),
51
+ 'link': article.get('link', 'No link'),
52
+ 'snippet': article.get('snippet', 'No preview available')
53
+ })
54
+
55
+ return articles
56
+
57
+ except Exception as e:
58
+ return f"Error: {str(e)}"
59
 
60
+ @tool
61
+ def scrape_articles(articles: List[Dict]) -> List[Dict]:
62
+ """Scrapes the full content of news articles from their URLs.
63
+
64
+ Args:
65
+ articles: List of article dictionaries containing article information
66
+
67
+ Returns:
68
+ List of articles with additional full_content field
69
+ """
70
+ webpage_tool = VisitWebpageTool()
71
+
72
+ for article in articles:
73
  try:
74
+ # Skip known paywalled sites
75
+ domain = article['link'].lower()
76
+ if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
77
+ article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
78
+ continue
79
+
80
+ full_content = webpage_tool.forward(article['link'])
81
+ if full_content and len(full_content.strip()) > 0:
82
+ article['full_content'] = full_content
83
+ else:
84
+ article['full_content'] = article['snippet']
85
+ except Exception as e:
86
+ article['full_content'] = article['snippet']
87
+
88
+ return articles
89
 
90
+ @tool
91
+ def summarize_news(articles: List[Dict]) -> str:
92
+ """Creates a summary of the news articles followed by a list of sources.
93
+
94
+ Args:
95
+ articles: List of article dictionaries containing title, source, date, link, snippet, and full_content
96
+
97
+ Returns:
98
+ A string containing a summary followed by article references
99
+ """
100
+ if not articles or not isinstance(articles, list):
101
+ return "No articles to summarize"
102
+
103
+ # Collect all content for the overall summary
104
+ all_content = [article.get('full_content', article['snippet']) for article in articles]
105
+
106
+ # Create a high-level summary from content
107
+ summary = "📰 Summary:\n"
108
+ summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
109
+ summary += "Key points: " + ". ".join(all_content[:2]) + "\n\n"
110
+
111
+ # List individual articles
112
+ summary += "🔍 Articles:\n"
113
+ for idx, article in enumerate(articles, 1):
114
+ title = article['title']
115
+ link = article['link']
116
+ date = article['date']
117
+ content = article.get('full_content', article['snippet'])
118
+ snippet = content[:200] + "..." if len(content) > 200 else content
119
+
120
+ summary += f"{idx}. **{title}**\n"
121
+ summary += f" {snippet}\n"
122
+ summary += f" [Read more]({link}) ({date})\n\n"
123
+
124
+ return summary
125
 
126
+ # Load prompt templates
127
+ with open("prompts.yaml", 'r') as stream:
128
+ prompt_templates = yaml.safe_load(stream)
129
 
130
+ # Initialize the model
131
+ model = HfApiModel(
132
+ max_tokens=2096,
133
+ temperature=0.5,
134
+ model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
135
+ custom_role_conversions=None,
136
+ )
137
 
138
+ final_answer = FinalAnswerTool()
139
+
140
+ # Create the agent with all tools
141
+ agent = CodeAgent(
142
+ model=model,
143
+ tools=[fetch_news, scrape_articles, summarize_news, final_answer], # Added scrape_articles
144
+ max_steps=6,
145
+ verbosity_level=1,
146
+ grammar=None,
147
+ planning_interval=None,
148
+ name="News Agent",
149
+ description="An agent that fetches and summarizes news about any topic",
150
+ prompt_templates=prompt_templates
151
+ )
152
 
153
+ # Launch the Gradio interface
154
+ if __name__ == "__main__":
155
+ GradioUI(agent).launch()