Spaces:

aliceblue11
/

naver_blog_00

Sleeping

App Files Files Community

naver_blog_00 / app.py

aliceblue11

Update app.py

bf00196 verified about 2 months ago

raw

history blame contribute delete

2.87 kB

	import requests
	from bs4 import BeautifulSoup
	import gradio as gr
	import random
	import time

	def convert_to_mobile_url(url):
	"""
	표준 네이버 블로그 URL을 모바일 URL로 변환합니다.
	"""
	if url.startswith("https://blog.naver.com/"):
	url_parts = url.split("/")
	blog_id = url_parts[-2]
	post_id = url_parts[-1]
	mobile_url = f"https://m.blog.naver.com/{blog_id}/{post_id}"
	return mobile_url
	return url # 이미 모바일 URL이면 그대로 반환

	def scrape_naver_blog(url):
	try:
	# 표준 URL을 모바일 URL로 변환
	url = convert_to_mobile_url(url)

	# HTTP 요청에 필요한 헤더 설정
	headers = {
	"User-Agent": (
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
	"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	),
	"Referer": "https://www.naver.com/", # 네이버 메인 페이지를 참조 페이지로 설정
	}

	# 요청 전 5~8초 지연
	delay = random.uniform(5, 8)
	time.sleep(delay)

	# URL로 GET 요청 보내기
	response = requests.get(url, headers=headers)
	response.raise_for_status() # HTTP 문제 발생 시 예외 발생

	# HTML 내용 파싱
	soup = BeautifulSoup(response.text, 'html.parser')

	# 제목 추출
	title_div = soup.find('div', class_='se-module se-module-text se-title-text')
	title = title_div.get_text(strip=True) if title_div else "제목을 찾을 수 없습니다."

	# 이미지 제외 텍스트 내용 추출
	text_components = soup.find_all('div', class_='se-module se-module-text')
	content = "\n".join(component.get_text(strip=True) for component in text_components if component)

	return f"제목: {title}\n내용: {content}"

	except Exception as e:
	# 오류 발생 시 메시지 반환
	return f"오류 발생: {e}"

	# Gradio 인터페이스 함수
	def gradio_interface(url):
	return scrape_naver_blog(url)

	# Gradio 인터페이스 설정
	iface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Textbox(label="네이버 블로그 URL 입력 (표준 또는 모바일)"),
	outputs=gr.Textbox(label="스크래핑된 블로그 내용"),
	title="네이버 블로그 스크래퍼 (텍스트만)",
	description=(
	"네이버 블로그 URL(표준 또는 모바일)을 입력하면 제목과 텍스트 내용을 스크래핑합니다. "
	"스크립트는 표준 URL을 자동으로 모바일 형식으로 변환하며, 헤더와 5~8초 지연을 설정하여 요청을 자연스럽게 만듭니다."
	),
	theme="compact", # 간결한 Gradio 인터페이스 테마
	)

	# Gradio 애플리케이션 실행
	if __name__ == "__main__":
	iface.launch()