Spaces:
Sleeping
Sleeping
File size: 2,456 Bytes
cd4f45f b79a379 83f2766 cd4f45f 5448c39 87047c3 276a412 87047c3 276a412 5448c39 2e55746 5448c39 cd4f45f 5448c39 83f2766 cd4f45f 8c2ca26 cd4f45f 8c2ca26 cd4f45f 8c2ca26 83f2766 cd4f45f b79a379 cd4f45f b79a379 83f2766 cd4f45f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# app.py
import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import time
def convert_to_mobile_url(url):
if "blog.naver.com" in url:
url_parts = url.split("/")
if len(url_parts) > 4:
user_id, post_id = url_parts[-2], url_parts[-1]
return f"https://m.blog.naver.com/{user_id}/{post_id}"
return url
def scrape_naver_blog(url):
try:
# ๋ชจ๋ฐ์ผ URL๋ก ๋ณํ
mobile_url = convert_to_mobile_url(url)
# ๋๋ค ๋๋ ์ด ์ถ๊ฐ
time.sleep(random.uniform(5, 8))
# HTTP ์์ฒญ ํค๋ ์ค์
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Referer": "https://www.naver.com/"
}
# HTTP ์์ฒญ ๋ณด๋ด๊ธฐ
response = requests.get(mobile_url, headers=headers)
response.raise_for_status()
# HTML ํ์ฑ
soup = BeautifulSoup(response.text, 'html.parser')
# ์ ๋ชฉ ์คํฌ๋ํ
title_div = soup.find('div', class_='se-module se-module-text se-title-text')
if title_div:
title = title_div.get_text(strip=True)
else:
title = "์ ๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค."
# ๋ด์ฉ ์คํฌ๋ํ
content_divs = soup.find_all('div', class_='se-module se-module-text')
content = []
if content_divs:
for div in content_divs:
paragraphs = div.find_all('p')
for p in paragraphs:
text = p.get_text(strip=True)
if text:
content.append(text)
else:
content.append("๋ด์ฉ์ ์ฐพ์ ์ ์์ต๋๋ค.")
return f"์ ๋ชฉ:\n{title}\n\n๋ด์ฉ:\n" + "\n".join(content)
except requests.exceptions.RequestException as e:
return f"HTTP ์์ฒญ ์๋ฌ: {e}"
except Exception as e:
return f"์คํฌ๋ํ ์๋ฌ: {e}"
# Gradio ์ธํฐํ์ด์ค ์ ์
def main():
gr.Interface(
fn=scrape_naver_blog,
inputs=gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL ์
๋ ฅ"),
outputs=gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ"),
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํผ",
description="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ์คํฌ๋ํํฉ๋๋ค. URL์ ์
๋ ฅํ์ธ์."
).launch()
if __name__ == "__main__":
main()
|