Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,51 +1,155 @@
|
|
1 |
-
from
|
2 |
-
from
|
|
|
|
|
3 |
import requests
|
4 |
-
import
|
5 |
-
import
|
6 |
-
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
try:
|
26 |
-
#
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
|
|
40 |
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
|
|
|
1 |
+
from smolagents import CodeAgent, HfApiModel, tool
|
2 |
+
from tools.final_answer import FinalAnswerTool
|
3 |
+
from tools.visit_webpage import VisitWebpageTool
|
4 |
+
from Gradio_UI import GradioUI
|
5 |
import requests
|
6 |
+
import yaml
|
7 |
+
import os
|
8 |
+
from typing import Dict, List, Optional
|
9 |
+
import re # Add this import at the top with other imports
|
10 |
|
11 |
+
@tool
|
12 |
+
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
|
13 |
+
"""Fetches recent news articles about any topic using Serper.dev.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
topic: The topic to search for news about
|
17 |
+
num_results: Number of news articles to retrieve (default: 5)
|
18 |
+
|
19 |
+
Returns:
|
20 |
+
List of dictionaries containing article information
|
21 |
+
"""
|
22 |
+
try:
|
23 |
+
api_key = os.environ.get("SERPER_API_KEY")
|
24 |
+
if not api_key:
|
25 |
+
return "Error: SERPER_API_KEY not found in environment variables"
|
26 |
+
|
27 |
+
url = f"https://google.serper.dev/news"
|
28 |
+
headers = {
|
29 |
+
"X-API-KEY": api_key
|
30 |
+
}
|
31 |
+
params = {
|
32 |
+
"q": topic,
|
33 |
+
"gl": "us",
|
34 |
+
"hl": "en"
|
35 |
+
}
|
36 |
+
|
37 |
+
response = requests.get(url, headers=headers, params=params)
|
38 |
+
response.raise_for_status()
|
39 |
+
|
40 |
+
results = response.json()
|
41 |
+
|
42 |
+
if "news" not in results:
|
43 |
+
return []
|
44 |
+
|
45 |
+
articles = []
|
46 |
+
for article in results["news"][:num_results]:
|
47 |
+
articles.append({
|
48 |
+
'title': article.get('title', 'No title'),
|
49 |
+
'source': article.get('source', 'Unknown source'),
|
50 |
+
'date': article.get('date', 'No date'),
|
51 |
+
'link': article.get('link', 'No link'),
|
52 |
+
'snippet': article.get('snippet', 'No preview available')
|
53 |
+
})
|
54 |
+
|
55 |
+
return articles
|
56 |
+
|
57 |
+
except Exception as e:
|
58 |
+
return f"Error: {str(e)}"
|
59 |
|
60 |
+
@tool
|
61 |
+
def scrape_articles(articles: List[Dict]) -> List[Dict]:
|
62 |
+
"""Scrapes the full content of news articles from their URLs.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
articles: List of article dictionaries containing article information
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
List of articles with additional full_content field
|
69 |
+
"""
|
70 |
+
webpage_tool = VisitWebpageTool()
|
71 |
+
|
72 |
+
for article in articles:
|
73 |
try:
|
74 |
+
# Skip known paywalled sites
|
75 |
+
domain = article['link'].lower()
|
76 |
+
if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
|
77 |
+
article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
|
78 |
+
continue
|
79 |
+
|
80 |
+
full_content = webpage_tool.forward(article['link'])
|
81 |
+
if full_content and len(full_content.strip()) > 0:
|
82 |
+
article['full_content'] = full_content
|
83 |
+
else:
|
84 |
+
article['full_content'] = article['snippet']
|
85 |
+
except Exception as e:
|
86 |
+
article['full_content'] = article['snippet']
|
87 |
+
|
88 |
+
return articles
|
89 |
|
90 |
+
@tool
|
91 |
+
def summarize_news(articles: List[Dict]) -> str:
|
92 |
+
"""Creates a summary of the news articles followed by a list of sources.
|
93 |
+
|
94 |
+
Args:
|
95 |
+
articles: List of article dictionaries containing title, source, date, link, snippet, and full_content
|
96 |
+
|
97 |
+
Returns:
|
98 |
+
A string containing a summary followed by article references
|
99 |
+
"""
|
100 |
+
if not articles or not isinstance(articles, list):
|
101 |
+
return "No articles to summarize"
|
102 |
+
|
103 |
+
# Collect all content for the overall summary
|
104 |
+
all_content = [article.get('full_content', article['snippet']) for article in articles]
|
105 |
+
|
106 |
+
# Create a high-level summary from content
|
107 |
+
summary = "📰 Summary:\n"
|
108 |
+
summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
|
109 |
+
summary += "Key points: " + ". ".join(all_content[:2]) + "\n\n"
|
110 |
+
|
111 |
+
# List individual articles
|
112 |
+
summary += "🔍 Articles:\n"
|
113 |
+
for idx, article in enumerate(articles, 1):
|
114 |
+
title = article['title']
|
115 |
+
link = article['link']
|
116 |
+
date = article['date']
|
117 |
+
content = article.get('full_content', article['snippet'])
|
118 |
+
snippet = content[:200] + "..." if len(content) > 200 else content
|
119 |
+
|
120 |
+
summary += f"{idx}. **{title}**\n"
|
121 |
+
summary += f" {snippet}\n"
|
122 |
+
summary += f" [Read more]({link}) ({date})\n\n"
|
123 |
+
|
124 |
+
return summary
|
125 |
|
126 |
+
# Load prompt templates
|
127 |
+
with open("prompts.yaml", 'r') as stream:
|
128 |
+
prompt_templates = yaml.safe_load(stream)
|
129 |
|
130 |
+
# Initialize the model
|
131 |
+
model = HfApiModel(
|
132 |
+
max_tokens=2096,
|
133 |
+
temperature=0.5,
|
134 |
+
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
|
135 |
+
custom_role_conversions=None,
|
136 |
+
)
|
137 |
|
138 |
+
final_answer = FinalAnswerTool()
|
139 |
+
|
140 |
+
# Create the agent with all tools
|
141 |
+
agent = CodeAgent(
|
142 |
+
model=model,
|
143 |
+
tools=[fetch_news, scrape_articles, summarize_news, final_answer], # Added scrape_articles
|
144 |
+
max_steps=6,
|
145 |
+
verbosity_level=1,
|
146 |
+
grammar=None,
|
147 |
+
planning_interval=None,
|
148 |
+
name="News Agent",
|
149 |
+
description="An agent that fetches and summarizes news about any topic",
|
150 |
+
prompt_templates=prompt_templates
|
151 |
+
)
|
152 |
|
153 |
+
# Launch the Gradio interface
|
154 |
+
if __name__ == "__main__":
|
155 |
+
GradioUI(agent).launch()
|