import gradio as gr import logging import torch from transformers import pipeline, AutoTokenizer import whisper from pydub import AudioSegment import requests from bs4 import BeautifulSoup from typing import Optional # Configuración básica de logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class NewsGenerator: def __init__(self): self.device = "cuda" if torch.cuda.is_available() else "cpu" self.whisper_model = None self.news_pipeline = None self.tokenizer = None # Carga diferida de modelos self._load_models() def _load_models(self): """Carga eficiente de modelos con gestión de memoria""" try: # Modelo de texto más pequeño y eficiente model_name = "facebook/bart-large-cnn" self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.news_pipeline = pipeline( "summarization", model=model_name, device=self.device, torch_dtype=torch.float16 if self.device == "cuda" else torch.float32 ) # Whisper optimizado self.whisper_model = whisper.load_model( "tiny.en" if self.device == "cpu" else "small", device=self.device ) except Exception as e: logger.error(f"Error loading models: {str(e)}") raise def transcribe_audio(self, audio_path: str) -> str: """Transcripción optimizada de audio""" try: result = self.whisper_model.transcribe(audio_path) return result.get("text", "") except Exception as e: logger.error(f"Transcription error: {str(e)}") return "" def generate_news(self, inputs: str, max_length: int = 200) -> str: """Generación de noticias con control de recursos""" try: return self.news_pipeline( inputs, max_length=max_length, min_length=30, do_sample=False, # Mejor rendimiento truncation=True )[0]['summary_text'] except Exception as e: logger.error(f"Generation error: {str(e)}") return "Error generating content" def read_document(file_path: str) -> str: """Lectura optimizada de documentos""" try: if file_path.endswith(".pdf"): import fitz with fitz.open(file_path) as doc: return " ".join(page.get_text() for page in doc) elif file_path.endswith(".docx"): from docx import Document return " ".join(p.text for p in Document(file_path).paragraphs) elif file_path.endswith((".xlsx", ".csv")): import pandas as pd return pd.read_excel(file_path).to_string() if file_path.endswith(".xlsx") else pd.read_csv(file_path).to_string() return "" except Exception as e: logger.error(f"Document error: {str(e)}") return "" def create_interface(): """Interfaz optimizada con Gradio""" generator = NewsGenerator() with gr.Blocks(title="Generador de Noticias Eficiente") as app: gr.Markdown("## 📰 Generador de Noticias Optimizado") with gr.Row(): with gr.Column(scale=2): inputs = gr.Textbox(label="Entrada Principal", lines=5) max_length = gr.Slider(100, 500, value=200, label="Longitud Máxima") generate_btn = gr.Button("Generar Noticia", variant="primary") with gr.Column(scale=1): doc_upload = gr.File(label="Subir Documento", file_types=[".pdf", ".docx", ".xlsx", ".csv"]) audio_upload = gr.File(label="Subir Audio", file_types=["audio", "video"]) output = gr.Textbox(label="Noticia Generada", lines=10, interactive=False) def process_inputs( main_input: str, document: Optional[str], audio: Optional[str], max_len: int ): try: # Procesar documentos y audio doc_content = read_document(document) if document else "" audio_content = generator.transcribe_audio(audio) if audio else "" # Construir prompt full_input = "\n".join([ main_input, f"Documento: {doc_content}", f"Audio: {audio_content}" ]) return generator.generate_news(full_input, max_len) except Exception as e: logger.error(f"Processing error: {str(e)}") return f"Error: {str(e)}" generate_btn.click( fn=process_inputs, inputs=[inputs, doc_upload, audio_upload, max_length], outputs=output ) return app if __name__ == "__main__": app = create_interface() app.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )