kijeoung commited on
Commit
83f2766
ยท
verified ยท
1 Parent(s): dbacafc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -30
app.py CHANGED
@@ -1,39 +1,49 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
  import gradio as gr
 
 
4
 
5
  def scrape_naver_blog(url):
6
  try:
7
- # ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํŽ˜์ด์ง€ ์š”์ฒญ
8
- response = requests.get(url)
9
- response.raise_for_status() # HTTP ์˜ค๋ฅ˜ ํ™•์ธ
10
-
11
- # BeautifulSoup์„ ์‚ฌ์šฉํ•˜์—ฌ HTML ํŒŒ์‹ฑ
12
- soup = BeautifulSoup(response.text, 'lxml')
13
-
14
- # ์ œ๋ชฉ ์ถ”์ถœ
15
- title_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[1]/div/div/div[1]'
16
- title_element = soup.select_one('html body div div div div div div div div div div div table tbody tr td div div div div div div div')
17
- title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
18
-
19
- # ๋‚ด์šฉ ์ถ”์ถœ
20
- content_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[3]/div[4]'
21
- content_element = soup.select_one('html body div div div div div div div div div div div table tbody tr td div div div div')
22
- content = content_element.get_text(strip=True) if content_element else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
23
-
24
- return f"์ œ๋ชฉ: {title}\n๋‚ด์šฉ: {content}"
25
-
 
 
 
 
26
  except Exception as e:
27
- return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
 
 
 
 
 
 
 
28
 
29
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
30
  iface = gr.Interface(
31
- fn=scrape_naver_blog,
32
- inputs="text",
33
- outputs="text",
34
- title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
35
- description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์„ ์ž…๋ ฅํ•˜๋ฉด ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
36
  )
37
 
38
- # Gradio ์•ฑ ์‹คํ–‰
39
- iface.launch()
 
 
 
1
  import gradio as gr
2
+ from bs4 import BeautifulSoup
3
+ import requests
4
 
5
  def scrape_naver_blog(url):
6
  try:
7
+ # Request the webpage
8
+ response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
9
+ response.raise_for_status()
10
+
11
+ # Parse the page with BeautifulSoup
12
+ soup = BeautifulSoup(response.content, 'html.parser')
13
+
14
+ # Extract the title
15
+ title_element = soup.select_one(
16
+ "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(1)"
17
+ )
18
+ title = title_element.get_text(strip=True) if title_element else "Title not found"
19
+
20
+ # Extract the content
21
+ content_element = soup.select_one(
22
+ "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4)"
23
+ )
24
+ content = content_element.get_text(strip=True) if content_element else "Content not found"
25
+
26
+ return {"์ œ๋ชฉ": title, "๋‚ด์šฉ": content}
27
+
28
+ except requests.exceptions.RequestException as e:
29
+ return {"error": f"Request failed: {e}"}
30
  except Exception as e:
31
+ return {"error": f"An error occurred: {e}"}
32
+
33
+ # Define Gradio interface
34
+ def display_scraper(url):
35
+ result = scrape_naver_blog(url)
36
+ if "error" in result:
37
+ return result["error"]
38
+ return f"์ œ๋ชฉ:\n{result['์ œ๋ชฉ']}\n\n๋‚ด์šฉ:\n{result['๋‚ด์šฉ']}"
39
 
 
40
  iface = gr.Interface(
41
+ fn=display_scraper,
42
+ inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
43
+ outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
44
+ title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํผ",
45
+ description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์„ ์ž…๋ ฅํ•˜๋ฉด ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค."
46
  )
47
 
48
+ if __name__ == "__main__":
49
+ iface.launch()