Spaces:
Sleeping
Sleeping
File size: 5,023 Bytes
c869a11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
from bs4 import BeautifulSoup
import asyncio
import aiohttp
from typing import List, Dict, Union
import json
BASE_URL = "https://hacker-news.firebaseio.com/v0"
async def fetch_item(session: aiohttp.ClientSession, item_id: int):
"""
Asynchronously fetches details of a story by its ID.
Args:
session: Aiohttp ClientSession for making HTTP requests.
item_id (int): The ID of the item to fetch.
Returns:
dict: Details of the story.
"""
url = f"{BASE_URL}/item/{item_id}.json"
async with session.get(url) as response:
return await response.json()
async def fetch_story_ids(story_type: str = "top", limit: int = None):
"""
Asynchronously fetches the top story IDs.
Args:
story_type: The story type. Defaults to top (`topstories.json`)
limit: The limit of stories to be fetched.
Returns:
List[int]: A list of top story IDs.
"""
url = f"{BASE_URL}/{story_type}stories.json"
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
async with session.get(url) as response:
story_ids = await response.json()
if limit:
story_ids = story_ids[:limit]
return story_ids
async def fetch_text(session, url):
"""
Fetches the text from a URL (if there's text to be fetched). If it fails,
it will return an informative message to the LLM.
Args:
session: `aiohttp` session
url: The story URL
Returns:
A string representing whether the story text or an informative error (represented as a string)
"""
try:
async with session.get(url) as response:
if response.status == 200:
html_content = await response.text()
soup = BeautifulSoup(html_content, 'html.parser')
text_content = soup.get_text()
return text_content
else:
return f"Unable to fetch content from {url}. Status code: {response.status}"
except Exception as e:
return f"An error occurred: {e}"
async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
"""
Asynchronously fetches the top Hacker News stories based on the provided parameters.
Args:
limit (int): The number of top stories to retrieve. Default is 10.
keywords (List[str]): A list of keywords to filter the top stories.
story_type (str): The story type
Returns:
List[Dict[str, Union[str, int]]]: A list of dictionaries containing
'story_id', 'title', 'url', and 'score' of the stories.
"""
if limit and keywords is None:
story_ids = await fetch_story_ids(story_type, limit)
else:
story_ids = await fetch_story_ids(story_type)
async def fetch_and_filter_stories(story_id):
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
story = await fetch_item(session, story_id)
return story
tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids]
stories = await asyncio.gather(*tasks)
filtered_stories = []
for story in stories:
story_info = {
"title": story.get("title"),
"url": story.get("url"),
"score": story.get("score"),
"story_id": story.get("id"),
}
if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords):
filtered_stories.append(story_info)
return filtered_stories[:limit]
async def get_relevant_comments(story_id: int, limit: int =10):
"""
Get the most relevant comments for a Hacker News item.
Args:
story_id: The ID of the Hacker News item.
limit: The number of comments to retrieve (default is 10).
Returns:
A list of dictionaries, each containing comment details.
"""
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
story = await fetch_item(session, story_id)
if 'kids' not in story:
return "This item doesn't have comments."
comment_ids = story['kids']
comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids])
comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True)
relevant_comments = comment_details[:limit]
relevant_comments = [comment["text"] for comment in relevant_comments]
return json.dumps(relevant_comments)
async def get_story_content(story_url: str):
"""
Gets the content of the story using BeautifulSoup.
Args:
story_url: A string representing the story URL
Returns:
The content of the story
"""
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
story_content = await fetch_text(session, story_url)
return story_content
|