Spaces:

kijeoung
/

blogcatch250113test

Sleeping

App Files Files Community

kijeoung commited on Jan 13

Commit

b79a379

verified ·

1 Parent(s): 838fd6a

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -30

app.py CHANGED Viewed

@@ -1,40 +1,39 @@
-from flask import Flask, request, jsonify
-from bs4 import BeautifulSoup
 import requests
-app = Flask(__name__)
 def scrape_naver_blog(url):
     try:
-        # Send a GET request to the URL
         response = requests.get(url)
-        response.raise_for_status()
-        # Parse the HTML content
-        soup = BeautifulSoup(response.text, 'html.parser')
-        # Extract title using the provided XPath-like logic
-        title_element = soup.select_one('div.se-module.se-module-text span.se-fs- se-ff-')
-        title = title_element.get_text(strip=True) if title_element else "Title not found"
-        # Extract content using the provided XPath-like logic
-        content_element = soup.select_one('div.se-main-container')
-        content = content_element.get_text(strip=True) if content_element else "Content not found"
-        return {"title": title, "content": content}
-    except Exception as e:
-        return {"error": str(e)}
-@app.route('/scrape', methods=['POST'])
-def scrape():
-    data = request.json
-    url = data.get('url')
-    if not url:
-        return jsonify({"error": "URL is required"}), 400
-    result = scrape_naver_blog(url)
-    return jsonify(result)
-if __name__ == '__main__':
-    app.run(debug=True)

 import requests
+from bs4 import BeautifulSoup
+import gradio as gr
 def scrape_naver_blog(url):
     try:
+        # 네이버 블로그 페이지 요청
         response = requests.get(url)
+        response.raise_for_status()  # HTTP 오류 확인
+        # BeautifulSoup을 사용하여 HTML 파싱
+        soup = BeautifulSoup(response.text, 'lxml')
+        # 제목 추출
+        title_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[1]/div/div/div[1]'
+        title_element = soup.select_one('html body div div div div div div div div div div div table tbody tr td div div div div div div div')
+        title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없습니다."
+        # 내용 추출
+        content_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[3]/div[4]'
+        content_element = soup.select_one('html body div div div div div div div div div div div table tbody tr td div div div div')
+        content = content_element.get_text(strip=True) if content_element else "내용을 찾을 수 없습니다."
+        return f"제목: {title}\n내용: {content}"
+    except Exception as e:
+        return f"오류 발생: {str(e)}"
+# Gradio 인터페이스 설정
+iface = gr.Interface(
+    fn=scrape_naver_blog,
+    inputs="text",
+    outputs="text",
+    title="네이버 블로그 스크래핑",
+    description="네이버 블로그 URL을 입력하면 제목과 내용을 스크래핑합니다."
+)
+# Gradio 앱 실행
+iface.launch()