kijeoung's picture
Update app.py
2e55746 verified
# app.py
import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import time
def convert_to_mobile_url(url):
if "blog.naver.com" in url:
url_parts = url.split("/")
if len(url_parts) > 4:
user_id, post_id = url_parts[-2], url_parts[-1]
return f"https://m.blog.naver.com/{user_id}/{post_id}"
return url
def scrape_naver_blog(url):
try:
# ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜
mobile_url = convert_to_mobile_url(url)
# ๋žœ๋ค ๋”œ๋ ˆ์ด ์ถ”๊ฐ€
time.sleep(random.uniform(5, 8))
# HTTP ์š”์ฒญ ํ—ค๋” ์„ค์ •
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Referer": "https://www.naver.com/"
}
# HTTP ์š”์ฒญ ๋ณด๋‚ด๊ธฐ
response = requests.get(mobile_url, headers=headers)
response.raise_for_status()
# HTML ํŒŒ์‹ฑ
soup = BeautifulSoup(response.text, 'html.parser')
# ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
title_div = soup.find('div', class_='se-module se-module-text se-title-text')
if title_div:
title = title_div.get_text(strip=True)
else:
title = "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
# ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
content_divs = soup.find_all('div', class_='se-module se-module-text')
content = []
if content_divs:
for div in content_divs:
paragraphs = div.find_all('p')
for p in paragraphs:
text = p.get_text(strip=True)
if text:
content.append(text)
else:
content.append("๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
return f"์ œ๋ชฉ:\n{title}\n\n๋‚ด์šฉ:\n" + "\n".join(content)
except requests.exceptions.RequestException as e:
return f"HTTP ์š”์ฒญ ์—๋Ÿฌ: {e}"
except Exception as e:
return f"์Šคํฌ๋ž˜ํ•‘ ์—๋Ÿฌ: {e}"
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
def main():
gr.Interface(
fn=scrape_naver_blog,
inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL ์ž…๋ ฅ"),
outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํผ",
description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์—์„œ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค. URL์„ ์ž…๋ ฅํ•˜์„ธ์š”."
).launch()
if __name__ == "__main__":
main()