Spaces:

kijeoung
/

blogcatch250113test

Sleeping

App Files Files Community

kijeoung commited on Jan 13

Commit

cd4f45f

verified ·

1 Parent(s): 83f2766

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -38

app.py CHANGED Viewed

@@ -1,49 +1,47 @@
 import gradio as gr
-from bs4 import BeautifulSoup
 import requests
 def scrape_naver_blog(url):
     try:
-        # Request the webpage
-        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
         response.raise_for_status()
-        # Parse the page with BeautifulSoup
-        soup = BeautifulSoup(response.content, 'html.parser')
-        # Extract the title
-        title_element = soup.select_one(
-            "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(1)"
-        )
-        title = title_element.get_text(strip=True) if title_element else "Title not found"
-        # Extract the content
-        content_element = soup.select_one(
-            "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4)"
-        )
-        content = content_element.get_text(strip=True) if content_element else "Content not found"
-        return {"제목": title, "내용": content}
     except requests.exceptions.RequestException as e:
-        return {"error": f"Request failed: {e}"}
     except Exception as e:
-        return {"error": f"An error occurred: {e}"}
-# Define Gradio interface
-def display_scraper(url):
-    result = scrape_naver_blog(url)
-    if "error" in result:
-        return result["error"]
-    return f"제목:\n{result['제목']}\n\n내용:\n{result['내용']}"
-iface = gr.Interface(
-    fn=display_scraper,
-    inputs=gr.Textbox(label="네이버 블로그 URL"),
-    outputs=gr.Textbox(label="스크래핑 결과"),
-    title="네이버 블로그 스크래퍼",
-    description="네이버 블로그 URL을 입력하면 블로그 제목과 내용을 추출합니다."
-)
 if __name__ == "__main__":
-    iface.launch()

+# app.py
 import gradio as gr
 import requests
+from bs4 import BeautifulSoup
 def scrape_naver_blog(url):
     try:
+        # HTTP 요청 보내기
+        response = requests.get(url)
         response.raise_for_status()
+        # HTML 파싱
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # 제목 스크래핑
+        title_div = soup.find('div', class_='se-module se-module-text se-title-text')
+        if title_div:
+            title = title_div.get_text(strip=True)
+        else:
+            title = "제목을 찾을 수 없습니다."
+        # 내용 스크래핑
+        content_div = soup.find('div', class_='se-module se-module-text se-quote')
+        if content_div:
+            content = "\n".join(p.get_text(strip=True) for p in content_div.find_all('p'))
+        else:
+            content = "내용을 찾을 수 없습니다."
+        return f"제목:\n{title}\n\n내용:\n{content}"
     except requests.exceptions.RequestException as e:
+        return f"HTTP 요청 에러: {e}"
     except Exception as e:
+        return f"스크래핑 에러: {e}"
+# Gradio 인터페이스 정의
+def main():
+    gr.Interface(
+        fn=scrape_naver_blog,
+        inputs=gr.Textbox(label="네이버 블로그 URL 입력"),
+        outputs=gr.Textbox(label="스크래핑 결과"),
+        title="네이버 블로그 스크래퍼",
+        description="네이버 블로그에서 제목과 내용을 스크래핑합니다. URL을 입력하세요."
+    ).launch()
 if __name__ == "__main__":
+    main()