import gradio as gr
import logging
import torch
from transformers import pipeline, AutoTokenizer
import whisper
from pydub import AudioSegment
import requests
from bs4 import BeautifulSoup
from typing import Optional

# Configuración básica de logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class NewsGenerator:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.whisper_model = None
        self.news_pipeline = None
        self.tokenizer = None
        
        # Carga diferida de modelos
        self._load_models()

    def _load_models(self):
        """Carga eficiente de modelos con gestión de memoria"""
        try:
            # Modelo de texto más pequeño y eficiente
            model_name = "facebook/bart-large-cnn"
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.news_pipeline = pipeline(
                "summarization",
                model=model_name,
                device=self.device,
                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
            )

            # Whisper optimizado
            self.whisper_model = whisper.load_model(
                "tiny.en" if self.device == "cpu" else "small",
                device=self.device
            )

        except Exception as e:
            logger.error(f"Error loading models: {str(e)}")
            raise

    def transcribe_audio(self, audio_path: str) -> str:
        """Transcripción optimizada de audio"""
        try:
            result = self.whisper_model.transcribe(audio_path)
            return result.get("text", "")
        except Exception as e:
            logger.error(f"Transcription error: {str(e)}")
            return ""

    def generate_news(self, inputs: str, max_length: int = 200) -> str:
        """Generación de noticias con control de recursos"""
        try:
            return self.news_pipeline(
                inputs,
                max_length=max_length,
                min_length=30,
                do_sample=False,  # Mejor rendimiento
                truncation=True
            )[0]['summary_text']
        except Exception as e:
            logger.error(f"Generation error: {str(e)}")
            return "Error generating content"

def read_document(file_path: str) -> str:
    """Lectura optimizada de documentos"""
    try:
        if file_path.endswith(".pdf"):
            import fitz
            with fitz.open(file_path) as doc:
                return " ".join(page.get_text() for page in doc)
        elif file_path.endswith(".docx"):
            from docx import Document
            return " ".join(p.text for p in Document(file_path).paragraphs)
        elif file_path.endswith((".xlsx", ".csv")):
            import pandas as pd
            return pd.read_excel(file_path).to_string() if file_path.endswith(".xlsx") else pd.read_csv(file_path).to_string()
        return ""
    except Exception as e:
        logger.error(f"Document error: {str(e)}")
        return ""

def create_interface():
    """Interfaz optimizada con Gradio"""
    generator = NewsGenerator()
    
    with gr.Blocks(title="Generador de Noticias Eficiente") as app:
        gr.Markdown("## 📰 Generador de Noticias Optimizado")
        
        with gr.Row():
            with gr.Column(scale=2):
                inputs = gr.Textbox(label="Entrada Principal", lines=5)
                max_length = gr.Slider(100, 500, value=200, label="Longitud Máxima")
                generate_btn = gr.Button("Generar Noticia", variant="primary")
            
            with gr.Column(scale=1):
                doc_upload = gr.File(label="Subir Documento", file_types=[".pdf", ".docx", ".xlsx", ".csv"])
                audio_upload = gr.File(label="Subir Audio", file_types=["audio", "video"])
                
        output = gr.Textbox(label="Noticia Generada", lines=10, interactive=False)
        
        def process_inputs(
            main_input: str,
            document: Optional[str],
            audio: Optional[str],
            max_len: int
        ):
            try:
                # Procesar documentos y audio
                doc_content = read_document(document) if document else ""
                audio_content = generator.transcribe_audio(audio) if audio else ""
                
                # Construir prompt
                full_input = "\n".join([
                    main_input,
                    f"Documento: {doc_content}",
                    f"Audio: {audio_content}"
                ])
                
                return generator.generate_news(full_input, max_len)
            
            except Exception as e:
                logger.error(f"Processing error: {str(e)}")
                return f"Error: {str(e)}"

        generate_btn.click(
            fn=process_inputs,
            inputs=[inputs, doc_upload, audio_upload, max_length],
            outputs=output
        )

    return app

if __name__ == "__main__":
    app = create_interface()
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )