Spaces:

CamiloVega
/

News_AI

Runtime error

App Files Files Community

News_AI / app.py

CamiloVega

Update app.py

4cfecb3 verified 10 days ago

raw

history blame

9.15 kB

	import gradio as gr
	import logging
	import torch
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import whisper
	from huggingface_hub import login
	from pydub import AudioSegment
	import requests
	from bs4 import BeautifulSoup
	from typing import Optional, Dict, Any
	import fitz # PyMuPDF

	# Configuración de logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Autenticación Hugging Face (reemplaza con tu token)
	HF_TOKEN = "hf_tu_token_aqui"
	login(token=HF_TOKEN)

	class NewsGenerator:
	def __init__(self):
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.whisper_model = None
	self.llm_model = None
	self.tokenizer = None

	self._load_models()

	def _load_models(self):
	"""Carga optimizada de modelos con quantización 4-bit"""
	try:
	# Modelo Llama-2 7B Chat
	model_name = "meta-llama/Llama-2-7b-chat-hf"
	self.tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	use_fast=True,
	token=HF_TOKEN
	)

	self.llm_model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	torch_dtype=torch.float16,
	load_in_4bit=True,
	low_cpu_mem_usage=True,
	token=HF_TOKEN
	)

	# Configuración de Whisper
	self.whisper_model = whisper.load_model(
	"small.en" if self.device == "cpu" else "medium",
	device=self.device
	)

	except Exception as e:
	logger.error(f"Error cargando modelos: {str(e)}")
	raise

	def transcribe_audio(self, audio_path: str) -> str:
	"""Transcripción de audio con manejo de errores"""
	try:
	result = self.whisper_model.transcribe(audio_path)
	return result.get("text", "")
	except Exception as e:
	logger.error(f"Error en transcripción: {str(e)}")
	return ""

	def generate_news(self, prompt: str, max_length: int = 512) -> str:
	"""Generación de noticias con Llama-2"""
	try:
	inputs = self.tokenizer(
	f"[INST]<<SYS>>Eres un periodista profesional. Genera una noticia bien estructurada basada en los siguientes datos:<</SYS>>\n{prompt}[/INST]",
	return_tensors="pt"
	).to(self.device)

	outputs = self.llm_model.generate(
	**inputs,
	max_new_tokens=max_length,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=self.tokenizer.eos_token_id
	)

	return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	except Exception as e:
	logger.error(f"Error generando noticia: {str(e)}")
	return "Error en generación"

	def read_document(file_path: str) -> str:
	"""Lectura optimizada de documentos"""
	try:
	if file_path.endswith(".pdf"):
	with fitz.open(file_path) as doc:
	return " ".join(page.get_text() for page in doc)
	elif file_path.endswith(".docx"):
	from docx import Document
	return " ".join(p.text for p in Document(file_path).paragraphs)
	elif file_path.endswith(".xlsx"):
	import pandas as pd
	return pd.read_excel(file_path).to_string()
	elif file_path.endswith(".csv"):
	import pandas as pd
	return pd.read_csv(file_path).to_string()
	return ""
	except Exception as e:
	logger.error(f"Error leyendo documento: {str(e)}")
	return ""

	def read_url(url: str) -> str:
	"""Extracción de contenido web"""
	try:
	response = requests.get(url, timeout=15)
	response.raise_for_status()
	return BeautifulSoup(response.content, 'html.parser').get_text(separator=' ', strip=True)
	except Exception as e:
	logger.error(f"Error leyendo URL: {str(e)}")
	return ""

	def process_social_media(url: str) -> Dict[str, Any]:
	"""Procesamiento de contenido social"""
	try:
	text = read_url(url)
	return {"text": text, "video": None}
	except Exception as e:
	logger.error(f"Error procesando red social: {str(e)}")
	return {"text": "", "video": None}

	def create_interface():
	"""Interfaz de usuario con Gradio"""
	generator = NewsGenerator()

	with gr.Blocks(title="Generador de Noticias AI", theme=gr.themes.Soft()) as app:
	gr.Markdown("# 📰 Generador de Noticias Profesional")

	with gr.Row():
	with gr.Column(scale=3):
	main_input = gr.Textbox(
	label="Tema principal",
	placeholder="Ingrese el tema o instrucciones principales...",
	lines=3
	)
	additional_data = gr.Textbox(
	label="Datos adicionales",
	placeholder="Hechos clave, nombres, fechas, etc...",
	lines=3
	)

	with gr.Accordion("Fuentes adicionales", open=False):
	doc_upload = gr.File(
	label="Subir documento",
	file_types=[".pdf", ".docx", ".xlsx", ".csv"]
	)
	audio_upload = gr.File(
	label="Subir audio/video",
	file_types=["audio", "video"]
	)
	url_input = gr.Textbox(
	label="URL de referencia",
	placeholder="https://..."
	)
	social_input = gr.Textbox(
	label="URL de red social",
	placeholder="https://..."
	)

	length_slider = gr.Slider(
	100, 1000, value=400,
	label="Longitud de la noticia (palabras)"
	)
	tone_select = gr.Dropdown(
	label="Tono periodístico",
	choices=["Formal", "Neutral", "Investigativo", "Narrativo"],
	value="Neutral"
	)

	with gr.Column(scale=2):
	output_news = gr.Textbox(
	label="Noticia generada",
	lines=18,
	interactive=False
	)
	generate_btn = gr.Button("Generar Noticia", variant="primary")
	status = gr.Textbox(label="Estado", interactive=False)

	def process_and_generate(
	main_input: str,
	additional_data: str,
	document: Optional[str],
	audio: Optional[str],
	url: Optional[str],
	social_url: Optional[str],
	length: int,
	tone: str
	):
	try:
	# Procesar fuentes adicionales
	doc_content = read_document(document) if document else ""
	audio_content = generator.transcribe_audio(audio) if audio else ""
	url_content = read_url(url) if url else ""
	social_content = process_social_media(social_url) if social_url else {"text": ""}

	# Construir prompt estructurado
	prompt = f"""
	## Instrucciones:
	- Tema principal: {main_input}
	- Datos proporcionados: {additional_data}
	- Tono requerido: {tone}

	## Fuentes:
	- Documento: {doc_content[:1000]}...
	- Audio: {audio_content[:500]}...
	- URL: {url_content[:1000]}...
	- Red social: {social_content['text'][:500]}...

	## Requisitos:
	- Estructura profesional (titular, lead, cuerpo)
	- Incluir las 5W
	- Citas relevantes si aplica
	- Longitud: {length} palabras
	"""

	return generator.generate_news(prompt, length), "✅ Generación exitosa"

	except Exception as e:
	logger.error(str(e))
	return f"Error: {str(e)}", "❌ Error en generación"

	generate_btn.click(
	fn=process_and_generate,
	inputs=[
	main_input,
	additional_data,
	doc_upload,
	audio_upload,
	url_input,
	social_input,
	length_slider,
	tone_select
	],
	outputs=[output_news, status]
	)

	return app

	if __name__ == "__main__":
	app = create_interface()
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)