Spaces:
Sleeping
Sleeping
# app.py | |
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
import random | |
import time | |
def convert_to_mobile_url(url): | |
if "blog.naver.com" in url: | |
url_parts = url.split("/") | |
if len(url_parts) > 4: | |
user_id, post_id = url_parts[-2], url_parts[-1] | |
return f"https://m.blog.naver.com/{user_id}/{post_id}" | |
return url | |
def scrape_naver_blog(url): | |
try: | |
# ๋ชจ๋ฐ์ผ URL๋ก ๋ณํ | |
mobile_url = convert_to_mobile_url(url) | |
# ๋๋ค ๋๋ ์ด ์ถ๊ฐ | |
time.sleep(random.uniform(5, 8)) | |
# HTTP ์์ฒญ ํค๋ ์ค์ | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", | |
"Referer": "https://www.naver.com/" | |
} | |
# HTTP ์์ฒญ ๋ณด๋ด๊ธฐ | |
response = requests.get(mobile_url, headers=headers) | |
response.raise_for_status() | |
# HTML ํ์ฑ | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# ์ ๋ชฉ ์คํฌ๋ํ | |
title_div = soup.find('div', class_='se-module se-module-text se-title-text') | |
if title_div: | |
title = title_div.get_text(strip=True) | |
else: | |
title = "์ ๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค." | |
# ๋ด์ฉ ์คํฌ๋ํ | |
content_divs = soup.find_all('div', class_='se-module se-module-text') | |
content = [] | |
if content_divs: | |
for div in content_divs: | |
paragraphs = div.find_all('p') | |
for p in paragraphs: | |
text = p.get_text(strip=True) | |
if text: | |
content.append(text) | |
else: | |
content.append("๋ด์ฉ์ ์ฐพ์ ์ ์์ต๋๋ค.") | |
return f"์ ๋ชฉ:\n{title}\n\n๋ด์ฉ:\n" + "\n".join(content) | |
except requests.exceptions.RequestException as e: | |
return f"HTTP ์์ฒญ ์๋ฌ: {e}" | |
except Exception as e: | |
return f"์คํฌ๋ํ ์๋ฌ: {e}" | |
# Gradio ์ธํฐํ์ด์ค ์ ์ | |
def main(): | |
gr.Interface( | |
fn=scrape_naver_blog, | |
inputs=gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL ์ ๋ ฅ"), | |
outputs=gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ"), | |
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํผ", | |
description="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ์คํฌ๋ํํฉ๋๋ค. URL์ ์ ๋ ฅํ์ธ์." | |
).launch() | |
if __name__ == "__main__": | |
main() | |