import gradio as gr import logging import torch import numpy as np from transformers import AutoTokenizer, AutoModelForCausalLM import whisper from huggingface_hub import login from pydub import AudioSegment import requests from bs4 import BeautifulSoup from typing import Optional, Dict, Any import fitz # PyMuPDF # Configuración de logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Autenticación Hugging Face (reemplaza con tu token) HF_TOKEN = "hf_tu_token_aqui" login(token=HF_TOKEN) class NewsGenerator: def __init__(self): self.device = "cuda" if torch.cuda.is_available() else "cpu" self.whisper_model = None self.llm_model = None self.tokenizer = None self._load_models() def _load_models(self): """Carga optimizada de modelos con quantización 4-bit""" try: # Modelo Llama-2 7B Chat model_name = "meta-llama/Llama-2-7b-chat-hf" self.tokenizer = AutoTokenizer.from_pretrained( model_name, use_fast=True, token=HF_TOKEN ) self.llm_model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.float16, load_in_4bit=True, low_cpu_mem_usage=True, token=HF_TOKEN ) # Configuración de Whisper self.whisper_model = whisper.load_model( "small.en" if self.device == "cpu" else "medium", device=self.device ) except Exception as e: logger.error(f"Error cargando modelos: {str(e)}") raise def transcribe_audio(self, audio_path: str) -> str: """Transcripción de audio con manejo de errores""" try: result = self.whisper_model.transcribe(audio_path) return result.get("text", "") except Exception as e: logger.error(f"Error en transcripción: {str(e)}") return "" def generate_news(self, prompt: str, max_length: int = 512) -> str: """Generación de noticias con Llama-2""" try: inputs = self.tokenizer( f"[INST]<>Eres un periodista profesional. Genera una noticia bien estructurada basada en los siguientes datos:<>\n{prompt}[/INST]", return_tensors="pt" ).to(self.device) outputs = self.llm_model.generate( **inputs, max_new_tokens=max_length, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=self.tokenizer.eos_token_id ) return self.tokenizer.decode(outputs[0], skip_special_tokens=True) except Exception as e: logger.error(f"Error generando noticia: {str(e)}") return "Error en generación" def read_document(file_path: str) -> str: """Lectura optimizada de documentos""" try: if file_path.endswith(".pdf"): with fitz.open(file_path) as doc: return " ".join(page.get_text() for page in doc) elif file_path.endswith(".docx"): from docx import Document return " ".join(p.text for p in Document(file_path).paragraphs) elif file_path.endswith(".xlsx"): import pandas as pd return pd.read_excel(file_path).to_string() elif file_path.endswith(".csv"): import pandas as pd return pd.read_csv(file_path).to_string() return "" except Exception as e: logger.error(f"Error leyendo documento: {str(e)}") return "" def read_url(url: str) -> str: """Extracción de contenido web""" try: response = requests.get(url, timeout=15) response.raise_for_status() return BeautifulSoup(response.content, 'html.parser').get_text(separator=' ', strip=True) except Exception as e: logger.error(f"Error leyendo URL: {str(e)}") return "" def process_social_media(url: str) -> Dict[str, Any]: """Procesamiento de contenido social""" try: text = read_url(url) return {"text": text, "video": None} except Exception as e: logger.error(f"Error procesando red social: {str(e)}") return {"text": "", "video": None} def create_interface(): """Interfaz de usuario con Gradio""" generator = NewsGenerator() with gr.Blocks(title="Generador de Noticias AI", theme=gr.themes.Soft()) as app: gr.Markdown("# 📰 Generador de Noticias Profesional") with gr.Row(): with gr.Column(scale=3): main_input = gr.Textbox( label="Tema principal", placeholder="Ingrese el tema o instrucciones principales...", lines=3 ) additional_data = gr.Textbox( label="Datos adicionales", placeholder="Hechos clave, nombres, fechas, etc...", lines=3 ) with gr.Accordion("Fuentes adicionales", open=False): doc_upload = gr.File( label="Subir documento", file_types=[".pdf", ".docx", ".xlsx", ".csv"] ) audio_upload = gr.File( label="Subir audio/video", file_types=["audio", "video"] ) url_input = gr.Textbox( label="URL de referencia", placeholder="https://..." ) social_input = gr.Textbox( label="URL de red social", placeholder="https://..." ) length_slider = gr.Slider( 100, 1000, value=400, label="Longitud de la noticia (palabras)" ) tone_select = gr.Dropdown( label="Tono periodístico", choices=["Formal", "Neutral", "Investigativo", "Narrativo"], value="Neutral" ) with gr.Column(scale=2): output_news = gr.Textbox( label="Noticia generada", lines=18, interactive=False ) generate_btn = gr.Button("Generar Noticia", variant="primary") status = gr.Textbox(label="Estado", interactive=False) def process_and_generate( main_input: str, additional_data: str, document: Optional[str], audio: Optional[str], url: Optional[str], social_url: Optional[str], length: int, tone: str ): try: # Procesar fuentes adicionales doc_content = read_document(document) if document else "" audio_content = generator.transcribe_audio(audio) if audio else "" url_content = read_url(url) if url else "" social_content = process_social_media(social_url) if social_url else {"text": ""} # Construir prompt estructurado prompt = f""" ## Instrucciones: - Tema principal: {main_input} - Datos proporcionados: {additional_data} - Tono requerido: {tone} ## Fuentes: - Documento: {doc_content[:1000]}... - Audio: {audio_content[:500]}... - URL: {url_content[:1000]}... - Red social: {social_content['text'][:500]}... ## Requisitos: - Estructura profesional (titular, lead, cuerpo) - Incluir las 5W - Citas relevantes si aplica - Longitud: {length} palabras """ return generator.generate_news(prompt, length), "✅ Generación exitosa" except Exception as e: logger.error(str(e)) return f"Error: {str(e)}", "❌ Error en generación" generate_btn.click( fn=process_and_generate, inputs=[ main_input, additional_data, doc_upload, audio_upload, url_input, social_input, length_slider, tone_select ], outputs=[output_news, status] ) return app if __name__ == "__main__": app = create_interface() app.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )