Spaces:

kijeoung
/

blogcatch250113test

Sleeping

App Files Files Community

kijeoung commited on Jan 13

Commit

83f2766

verified ·

1 Parent(s): dbacafc

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -30

app.py CHANGED Viewed

@@ -1,39 +1,49 @@
-import requests
-from bs4 import BeautifulSoup
 import gradio as gr
 def scrape_naver_blog(url):
     try:
-        # 네이버 블로그 페이지 요청
-        response = requests.get(url)
-        response.raise_for_status()  # HTTP 오류 확인
-        # BeautifulSoup을 사용하여 HTML 파싱
-        soup = BeautifulSoup(response.text, 'lxml')
-        # 제목 추출
-        title_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[1]/div/div/div[1]'
-        title_element = soup.select_one('html body div div div div div div div div div div div table tbody tr td div div div div div div div')
-        title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없습니다."
-        # 내용 추출
-        content_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[3]/div[4]'
-        content_element = soup.select_one('html body div div div div div div div div div div div table tbody tr td div div div div')
-        content = content_element.get_text(strip=True) if content_element else "내용을 찾을 수 없습니다."
-        return f"제목: {title}\n내용: {content}"
     except Exception as e:
-        return f"오류 발생: {str(e)}"
-# Gradio 인터페이스 설정
 iface = gr.Interface(
-    fn=scrape_naver_blog,
-    inputs="text",
-    outputs="text",
-    title="네이버 블로그 스크래핑",
-    description="네이버 블로그 URL을 입력하면 제목과 내용을 스크래핑합니다."
 )
-# Gradio 앱 실행
-iface.launch()

 import gradio as gr
+from bs4 import BeautifulSoup
+import requests
 def scrape_naver_blog(url):
     try:
+        # Request the webpage
+        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
+        response.raise_for_status()
+        # Parse the page with BeautifulSoup
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Extract the title
+        title_element = soup.select_one(
+            "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(1)"
+        )
+        title = title_element.get_text(strip=True) if title_element else "Title not found"
+        # Extract the content
+        content_element = soup.select_one(
+            "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4)"
+        )
+        content = content_element.get_text(strip=True) if content_element else "Content not found"
+        return {"제목": title, "내용": content}
+    except requests.exceptions.RequestException as e:
+        return {"error": f"Request failed: {e}"}
     except Exception as e:
+        return {"error": f"An error occurred: {e}"}
+# Define Gradio interface
+def display_scraper(url):
+    result = scrape_naver_blog(url)
+    if "error" in result:
+        return result["error"]
+    return f"제목:\n{result['제목']}\n\n내용:\n{result['내용']}"
 iface = gr.Interface(
+    fn=display_scraper,
+    inputs=gr.Textbox(label="네이버 블로그 URL"),
+    outputs=gr.Textbox(label="스크래핑 결과"),
+    title="네이버 블로그 스크래퍼",
+    description="네이버 블로그 URL을 입력하면 블로그 제목과 내용을 추출합니다."
 )
+if __name__ == "__main__":
+    iface.launch()