kijeoung commited on
Commit
cd4f45f
ยท
verified ยท
1 Parent(s): 83f2766

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -38
app.py CHANGED
@@ -1,49 +1,47 @@
 
1
  import gradio as gr
2
- from bs4 import BeautifulSoup
3
  import requests
 
4
 
5
  def scrape_naver_blog(url):
6
  try:
7
- # Request the webpage
8
- response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
9
  response.raise_for_status()
10
-
11
- # Parse the page with BeautifulSoup
12
- soup = BeautifulSoup(response.content, 'html.parser')
13
-
14
- # Extract the title
15
- title_element = soup.select_one(
16
- "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(1)"
17
- )
18
- title = title_element.get_text(strip=True) if title_element else "Title not found"
19
-
20
- # Extract the content
21
- content_element = soup.select_one(
22
- "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4)"
23
- )
24
- content = content_element.get_text(strip=True) if content_element else "Content not found"
25
-
26
- return {"์ œ๋ชฉ": title, "๋‚ด์šฉ": content}
 
 
27
 
28
  except requests.exceptions.RequestException as e:
29
- return {"error": f"Request failed: {e}"}
30
  except Exception as e:
31
- return {"error": f"An error occurred: {e}"}
32
-
33
- # Define Gradio interface
34
- def display_scraper(url):
35
- result = scrape_naver_blog(url)
36
- if "error" in result:
37
- return result["error"]
38
- return f"์ œ๋ชฉ:\n{result['์ œ๋ชฉ']}\n\n๋‚ด์šฉ:\n{result['๋‚ด์šฉ']}"
39
-
40
- iface = gr.Interface(
41
- fn=display_scraper,
42
- inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
43
- outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
44
- title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํผ",
45
- description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์„ ์ž…๋ ฅํ•˜๋ฉด ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค."
46
- )
47
 
48
  if __name__ == "__main__":
49
- iface.launch()
 
1
+ # app.py
2
  import gradio as gr
 
3
  import requests
4
+ from bs4 import BeautifulSoup
5
 
6
  def scrape_naver_blog(url):
7
  try:
8
+ # HTTP ์š”์ฒญ ๋ณด๋‚ด๊ธฐ
9
+ response = requests.get(url)
10
  response.raise_for_status()
11
+
12
+ # HTML ํŒŒ์‹ฑ
13
+ soup = BeautifulSoup(response.text, 'html.parser')
14
+
15
+ # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
16
+ title_div = soup.find('div', class_='se-module se-module-text se-title-text')
17
+ if title_div:
18
+ title = title_div.get_text(strip=True)
19
+ else:
20
+ title = "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
21
+
22
+ # ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
23
+ content_div = soup.find('div', class_='se-module se-module-text se-quote')
24
+ if content_div:
25
+ content = "\n".join(p.get_text(strip=True) for p in content_div.find_all('p'))
26
+ else:
27
+ content = "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
28
+
29
+ return f"์ œ๋ชฉ:\n{title}\n\n๋‚ด์šฉ:\n{content}"
30
 
31
  except requests.exceptions.RequestException as e:
32
+ return f"HTTP ์š”์ฒญ ์—๋Ÿฌ: {e}"
33
  except Exception as e:
34
+ return f"์Šคํฌ๋ž˜ํ•‘ ์—๋Ÿฌ: {e}"
35
+
36
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
37
+ def main():
38
+ gr.Interface(
39
+ fn=scrape_naver_blog,
40
+ inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL ์ž…๋ ฅ"),
41
+ outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
42
+ title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํผ",
43
+ description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์—์„œ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค. URL์„ ์ž…๋ ฅํ•˜์„ธ์š”."
44
+ ).launch()
 
 
 
 
 
45
 
46
  if __name__ == "__main__":
47
+ main()