my-news-agent

Running

App Files Files Community

fdaudens HF staff commited on 11 days ago

Commit

bf90cd3

verified ·

1 Parent(s): 81db657

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -42

app.py CHANGED Viewed

@@ -1,51 +1,155 @@
-from typing import Any, Optional
-from smolagents.tools import Tool
 import requests
-import markdownify
-import smolagents
-import re  # Add re import here
-class VisitWebpageTool(Tool):
-    name = "visit_webpage"
-    description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
-    inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
-    output_type = "string"
-    def forward(self, url: str) -> str:
-        try:
-            import requests
-            from markdownify import markdownify
-            from requests.exceptions import RequestException
-            from smolagents.utils import truncate_content
-        except ImportError as e:
-            raise ImportError(
-                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
-            ) from e
         try:
-            # Add user agent to avoid some blocking
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-            # Send a GET request to the URL with a 20-second timeout
-            response = requests.get(url, timeout=20, headers=headers)
-            response.raise_for_status()
-            # Convert the HTML content to Markdown
-            markdown_content = markdownify(response.text).strip()
-            # Remove multiple line breaks
-            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
-            return truncate_content(markdown_content, 10000)
-        except requests.exceptions.Timeout:
-            return "The request timed out. Please try again later or check the URL."
-        except RequestException as e:
-            return f"Error fetching the webpage: {str(e)}"
-        except Exception as e:
-            return f"An unexpected error occurred: {str(e)}"
-    def __init__(self, *args, **kwargs):
-        self.is_initialized = False

+from smolagents import CodeAgent, HfApiModel, tool
+from tools.final_answer import FinalAnswerTool
+from tools.visit_webpage import VisitWebpageTool
+from Gradio_UI import GradioUI
 import requests
+import yaml
+import os
+from typing import Dict, List, Optional
+import re  # Add this import at the top with other imports
+@tool
+def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
+    """Fetches recent news articles about any topic using Serper.dev.
+    Args:
+        topic: The topic to search for news about
+        num_results: Number of news articles to retrieve (default: 5)
+    Returns:
+        List of dictionaries containing article information
+    """
+    try:
+        api_key = os.environ.get("SERPER_API_KEY")
+        if not api_key:
+            return "Error: SERPER_API_KEY not found in environment variables"
+        url = f"https://google.serper.dev/news"
+        headers = {
+            "X-API-KEY": api_key
+        }
+        params = {
+            "q": topic,
+            "gl": "us",
+            "hl": "en"
+        }
+        response = requests.get(url, headers=headers, params=params)
+        response.raise_for_status()
+        results = response.json()
+        if "news" not in results:
+            return []
+        articles = []
+        for article in results["news"][:num_results]:
+            articles.append({
+                'title': article.get('title', 'No title'),
+                'source': article.get('source', 'Unknown source'),
+                'date': article.get('date', 'No date'),
+                'link': article.get('link', 'No link'),
+                'snippet': article.get('snippet', 'No preview available')
+            })
+        return articles
+    except Exception as e:
+        return f"Error: {str(e)}"
+@tool
+def scrape_articles(articles: List[Dict]) -> List[Dict]:
+    """Scrapes the full content of news articles from their URLs.
+    Args:
+        articles: List of article dictionaries containing article information
+    Returns:
+        List of articles with additional full_content field
+    """
+    webpage_tool = VisitWebpageTool()
+    for article in articles:
         try:
+            # Skip known paywalled sites
+            domain = article['link'].lower()
+            if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
+                article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
+                continue
+            full_content = webpage_tool.forward(article['link'])
+            if full_content and len(full_content.strip()) > 0:
+                article['full_content'] = full_content
+            else:
+                article['full_content'] = article['snippet']
+        except Exception as e:
+            article['full_content'] = article['snippet']
+    return articles
+@tool
+def summarize_news(articles: List[Dict]) -> str:
+    """Creates a summary of the news articles followed by a list of sources.
+    Args:
+        articles: List of article dictionaries containing title, source, date, link, snippet, and full_content
+    Returns:
+        A string containing a summary followed by article references
+    """
+    if not articles or not isinstance(articles, list):
+        return "No articles to summarize"
+    # Collect all content for the overall summary
+    all_content = [article.get('full_content', article['snippet']) for article in articles]
+    # Create a high-level summary from content
+    summary = "📰 Summary:\n"
+    summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
+    summary += "Key points: " + ". ".join(all_content[:2]) + "\n\n"
+    # List individual articles
+    summary += "🔍 Articles:\n"
+    for idx, article in enumerate(articles, 1):
+        title = article['title']
+        link = article['link']
+        date = article['date']
+        content = article.get('full_content', article['snippet'])
+        snippet = content[:200] + "..." if len(content) > 200 else content
+        summary += f"{idx}. **{title}**\n"
+        summary += f"   {snippet}\n"
+        summary += f"   [Read more]({link}) ({date})\n\n"
+    return summary
+# Load prompt templates
+with open("prompts.yaml", 'r') as stream:
+    prompt_templates = yaml.safe_load(stream)
+# Initialize the model
+model = HfApiModel(
+    max_tokens=2096,
+    temperature=0.5,
+    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
+    custom_role_conversions=None,
+)
+final_answer = FinalAnswerTool()
+# Create the agent with all tools
+agent = CodeAgent(
+    model=model,
+    tools=[fetch_news, scrape_articles, summarize_news, final_answer],  # Added scrape_articles
+    max_steps=6,
+    verbosity_level=1,
+    grammar=None,
+    planning_interval=None,
+    name="News Agent",
+    description="An agent that fetches and summarizes news about any topic",
+    prompt_templates=prompt_templates
+)
+# Launch the Gradio interface
+if __name__ == "__main__":
+    GradioUI(agent).launch()