Spaces:

xuyingliKepler
/

hackernews_bot

Sleeping

File size: 5,023 Bytes

c869a11

from bs4 import BeautifulSoup
import asyncio
import aiohttp
from typing import List, Dict, Union
import json

BASE_URL = "https://hacker-news.firebaseio.com/v0"


async def fetch_item(session: aiohttp.ClientSession, item_id: int):
    """
    Asynchronously fetches details of a story by its ID.

    Args:
        session: Aiohttp ClientSession for making HTTP requests.
        item_id (int): The ID of the item to fetch.

    Returns:
        dict: Details of the story.
    """
    url = f"{BASE_URL}/item/{item_id}.json"
    async with session.get(url) as response:
        return await response.json()


async def fetch_story_ids(story_type: str = "top", limit: int = None):
    """
    Asynchronously fetches the top story IDs.

    Args:
        story_type: The story type. Defaults to top (`topstories.json`)
        limit: The limit of stories to be fetched.

    Returns:
        List[int]: A list of top story IDs.
    """
    url = f"{BASE_URL}/{story_type}stories.json"
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
        async with session.get(url) as response:
            story_ids = await response.json()

    if limit:
        story_ids = story_ids[:limit]

    return story_ids


async def fetch_text(session, url):
    """
    Fetches the text from a URL (if there's text to be fetched). If it fails,
    it will return an informative message to the LLM.

    Args:
        session: `aiohttp` session
        url: The story URL

    Returns:
        A string representing whether the story text or an informative error (represented as a string)
    """
    try:
        async with session.get(url) as response:
            if response.status == 200:

                html_content = await response.text()
                soup = BeautifulSoup(html_content, 'html.parser')
                text_content = soup.get_text()

                return text_content
            else:
                return f"Unable to fetch content from {url}. Status code: {response.status}"
    except Exception as e:
        return f"An error occurred: {e}"


async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
    """
    Asynchronously fetches the top Hacker News stories based on the provided parameters.

    Args:
        limit (int): The number of top stories to retrieve. Default is 10.
        keywords (List[str]): A list of keywords to filter the top stories.
        story_type (str): The story type

    Returns:
        List[Dict[str, Union[str, int]]]: A list of dictionaries containing
        'story_id', 'title', 'url', and 'score' of the stories.
    """

    if limit and keywords is None:
        story_ids = await fetch_story_ids(story_type, limit)
    else:
        story_ids = await fetch_story_ids(story_type)

    async def fetch_and_filter_stories(story_id):
        async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
            story = await fetch_item(session, story_id)
        return story

    tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids]
    stories = await asyncio.gather(*tasks)

    filtered_stories = []
    for story in stories:
        story_info = {
            "title": story.get("title"),
            "url": story.get("url"),
            "score": story.get("score"),
            "story_id": story.get("id"),
        }

        if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords):
            filtered_stories.append(story_info)

    return filtered_stories[:limit]


async def get_relevant_comments(story_id: int, limit: int =10):
    """
    Get the most relevant comments for a Hacker News item.

    Args:
        story_id: The ID of the Hacker News item.
        limit: The number of comments to retrieve (default is 10).

    Returns:
        A list of dictionaries, each containing comment details.
    """
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
        story = await fetch_item(session, story_id)

        if 'kids' not in story:
            return "This item doesn't have comments."

        comment_ids = story['kids']

        comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids])
        comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True)

        relevant_comments = comment_details[:limit]
        relevant_comments = [comment["text"] for comment in relevant_comments]

        return json.dumps(relevant_comments)


async def get_story_content(story_url: str):
    """
    Gets the content of the story using BeautifulSoup.

    Args:
        story_url: A string representing the story URL

    Returns:
        The content of the story
    """
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
        story_content = await fetch_text(session, story_url)
        return story_content