kijeoung commited on
Commit
b79a379
ยท
verified ยท
1 Parent(s): 838fd6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -30
app.py CHANGED
@@ -1,40 +1,39 @@
1
- from flask import Flask, request, jsonify
2
- from bs4 import BeautifulSoup
3
  import requests
4
-
5
- app = Flask(__name__)
6
 
7
  def scrape_naver_blog(url):
8
  try:
9
- # Send a GET request to the URL
10
  response = requests.get(url)
11
- response.raise_for_status()
12
-
13
- # Parse the HTML content
14
- soup = BeautifulSoup(response.text, 'html.parser')
15
-
16
- # Extract title using the provided XPath-like logic
17
- title_element = soup.select_one('div.se-module.se-module-text span.se-fs- se-ff-')
18
- title = title_element.get_text(strip=True) if title_element else "Title not found"
19
 
20
- # Extract content using the provided XPath-like logic
21
- content_element = soup.select_one('div.se-main-container')
22
- content = content_element.get_text(strip=True) if content_element else "Content not found"
23
 
24
- return {"title": title, "content": content}
 
 
 
25
 
26
- except Exception as e:
27
- return {"error": str(e)}
28
-
29
- @app.route('/scrape', methods=['POST'])
30
- def scrape():
31
- data = request.json
32
- url = data.get('url')
33
- if not url:
34
- return jsonify({"error": "URL is required"}), 400
35
 
36
- result = scrape_naver_blog(url)
37
- return jsonify(result)
38
 
39
- if __name__ == '__main__':
40
- app.run(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
+ from bs4 import BeautifulSoup
3
+ import gradio as gr
4
 
5
  def scrape_naver_blog(url):
6
  try:
7
+ # ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํŽ˜์ด์ง€ ์š”์ฒญ
8
  response = requests.get(url)
9
+ response.raise_for_status() # HTTP ์˜ค๋ฅ˜ ํ™•์ธ
 
 
 
 
 
 
 
10
 
11
+ # BeautifulSoup์„ ์‚ฌ์šฉํ•˜์—ฌ HTML ํŒŒ์‹ฑ
12
+ soup = BeautifulSoup(response.text, 'lxml')
 
13
 
14
+ # ์ œ๋ชฉ ์ถ”์ถœ
15
+ title_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[1]/div/div/div[1]'
16
+ title_element = soup.select_one('html body div div div div div div div div div div div table tbody tr td div div div div div div div')
17
+ title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
18
 
19
+ # ๋‚ด์šฉ ์ถ”์ถœ
20
+ content_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[3]/div[4]'
21
+ content_element = soup.select_one('html body div div div div div div div div div div div table tbody tr td div div div div')
22
+ content = content_element.get_text(strip=True) if content_element else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
 
 
 
 
 
23
 
24
+ return f"์ œ๋ชฉ: {title}\n๋‚ด์šฉ: {content}"
 
25
 
26
+ except Exception as e:
27
+ return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
28
+
29
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
30
+ iface = gr.Interface(
31
+ fn=scrape_naver_blog,
32
+ inputs="text",
33
+ outputs="text",
34
+ title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
35
+ description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์„ ์ž…๋ ฅํ•˜๋ฉด ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
36
+ )
37
+
38
+ # Gradio ์•ฑ ์‹คํ–‰
39
+ iface.launch()