File size: 5,023 Bytes
c869a11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from bs4 import BeautifulSoup
import asyncio
import aiohttp
from typing import List, Dict, Union
import json

BASE_URL = "https://hacker-news.firebaseio.com/v0"


async def fetch_item(session: aiohttp.ClientSession, item_id: int):
    """
    Asynchronously fetches details of a story by its ID.

    Args:
        session: Aiohttp ClientSession for making HTTP requests.
        item_id (int): The ID of the item to fetch.

    Returns:
        dict: Details of the story.
    """
    url = f"{BASE_URL}/item/{item_id}.json"
    async with session.get(url) as response:
        return await response.json()


async def fetch_story_ids(story_type: str = "top", limit: int = None):
    """
    Asynchronously fetches the top story IDs.

    Args:
        story_type: The story type. Defaults to top (`topstories.json`)
        limit: The limit of stories to be fetched.

    Returns:
        List[int]: A list of top story IDs.
    """
    url = f"{BASE_URL}/{story_type}stories.json"
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
        async with session.get(url) as response:
            story_ids = await response.json()

    if limit:
        story_ids = story_ids[:limit]

    return story_ids


async def fetch_text(session, url):
    """
    Fetches the text from a URL (if there's text to be fetched). If it fails,
    it will return an informative message to the LLM.

    Args:
        session: `aiohttp` session
        url: The story URL

    Returns:
        A string representing whether the story text or an informative error (represented as a string)
    """
    try:
        async with session.get(url) as response:
            if response.status == 200:

                html_content = await response.text()
                soup = BeautifulSoup(html_content, 'html.parser')
                text_content = soup.get_text()

                return text_content
            else:
                return f"Unable to fetch content from {url}. Status code: {response.status}"
    except Exception as e:
        return f"An error occurred: {e}"


async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
    """
    Asynchronously fetches the top Hacker News stories based on the provided parameters.

    Args:
        limit (int): The number of top stories to retrieve. Default is 10.
        keywords (List[str]): A list of keywords to filter the top stories.
        story_type (str): The story type

    Returns:
        List[Dict[str, Union[str, int]]]: A list of dictionaries containing
        'story_id', 'title', 'url', and 'score' of the stories.
    """

    if limit and keywords is None:
        story_ids = await fetch_story_ids(story_type, limit)
    else:
        story_ids = await fetch_story_ids(story_type)

    async def fetch_and_filter_stories(story_id):
        async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
            story = await fetch_item(session, story_id)
        return story

    tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids]
    stories = await asyncio.gather(*tasks)

    filtered_stories = []
    for story in stories:
        story_info = {
            "title": story.get("title"),
            "url": story.get("url"),
            "score": story.get("score"),
            "story_id": story.get("id"),
        }

        if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords):
            filtered_stories.append(story_info)

    return filtered_stories[:limit]


async def get_relevant_comments(story_id: int, limit: int =10):
    """
    Get the most relevant comments for a Hacker News item.

    Args:
        story_id: The ID of the Hacker News item.
        limit: The number of comments to retrieve (default is 10).

    Returns:
        A list of dictionaries, each containing comment details.
    """
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
        story = await fetch_item(session, story_id)

        if 'kids' not in story:
            return "This item doesn't have comments."

        comment_ids = story['kids']

        comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids])
        comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True)

        relevant_comments = comment_details[:limit]
        relevant_comments = [comment["text"] for comment in relevant_comments]

        return json.dumps(relevant_comments)


async def get_story_content(story_url: str):
    """
    Gets the content of the story using BeautifulSoup.

    Args:
        story_url: A string representing the story URL

    Returns:
        The content of the story
    """
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
        story_content = await fetch_text(session, story_url)
        return story_content