import gradio as gr
import logging
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import whisper
from huggingface_hub import login
from pydub import AudioSegment
import requests
from bs4 import BeautifulSoup
from typing import Optional, Dict, Any
import fitz  # PyMuPDF

# Configuración de logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Autenticación Hugging Face (reemplaza con tu token)
HF_TOKEN = "hf_tu_token_aqui"
login(token=HF_TOKEN)

class NewsGenerator:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.whisper_model = None
        self.llm_model = None
        self.tokenizer = None
        
        self._load_models()

    def _load_models(self):
        """Carga optimizada de modelos con quantización 4-bit"""
        try:
            # Modelo Llama-2 7B Chat
            model_name = "meta-llama/Llama-2-7b-chat-hf"
            self.tokenizer = AutoTokenizer.from_pretrained(
                model_name,
                use_fast=True,
                token=HF_TOKEN
            )
            
            self.llm_model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map="auto",
                torch_dtype=torch.float16,
                load_in_4bit=True,
                low_cpu_mem_usage=True,
                token=HF_TOKEN
            )

            # Configuración de Whisper
            self.whisper_model = whisper.load_model(
                "small.en" if self.device == "cpu" else "medium",
                device=self.device
            )

        except Exception as e:
            logger.error(f"Error cargando modelos: {str(e)}")
            raise

    def transcribe_audio(self, audio_path: str) -> str:
        """Transcripción de audio con manejo de errores"""
        try:
            result = self.whisper_model.transcribe(audio_path)
            return result.get("text", "")
        except Exception as e:
            logger.error(f"Error en transcripción: {str(e)}")
            return ""

    def generate_news(self, prompt: str, max_length: int = 512) -> str:
        """Generación de noticias con Llama-2"""
        try:
            inputs = self.tokenizer(
                f"[INST]<<SYS>>Eres un periodista profesional. Genera una noticia bien estructurada basada en los siguientes datos:<</SYS>>\n{prompt}[/INST]",
                return_tensors="pt"
            ).to(self.device)

            outputs = self.llm_model.generate(
                **inputs,
                max_new_tokens=max_length,
                temperature=0.7,
                top_p=0.9,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
            
            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
        except Exception as e:
            logger.error(f"Error generando noticia: {str(e)}")
            return "Error en generación"

def read_document(file_path: str) -> str:
    """Lectura optimizada de documentos"""
    try:
        if file_path.endswith(".pdf"):
            with fitz.open(file_path) as doc:
                return " ".join(page.get_text() for page in doc)
        elif file_path.endswith(".docx"):
            from docx import Document
            return " ".join(p.text for p in Document(file_path).paragraphs)
        elif file_path.endswith(".xlsx"):
            import pandas as pd
            return pd.read_excel(file_path).to_string()
        elif file_path.endswith(".csv"):
            import pandas as pd
            return pd.read_csv(file_path).to_string()
        return ""
    except Exception as e:
        logger.error(f"Error leyendo documento: {str(e)}")
        return ""

def read_url(url: str) -> str:
    """Extracción de contenido web"""
    try:
        response = requests.get(url, timeout=15)
        response.raise_for_status()
        return BeautifulSoup(response.content, 'html.parser').get_text(separator=' ', strip=True)
    except Exception as e:
        logger.error(f"Error leyendo URL: {str(e)}")
        return ""

def process_social_media(url: str) -> Dict[str, Any]:
    """Procesamiento de contenido social"""
    try:
        text = read_url(url)
        return {"text": text, "video": None}
    except Exception as e:
        logger.error(f"Error procesando red social: {str(e)}")
        return {"text": "", "video": None}

def create_interface():
    """Interfaz de usuario con Gradio"""
    generator = NewsGenerator()
    
    with gr.Blocks(title="Generador de Noticias AI", theme=gr.themes.Soft()) as app:
        gr.Markdown("# 📰 Generador de Noticias Profesional")
        
        with gr.Row():
            with gr.Column(scale=3):
                main_input = gr.Textbox(
                    label="Tema principal",
                    placeholder="Ingrese el tema o instrucciones principales...",
                    lines=3
                )
                additional_data = gr.Textbox(
                    label="Datos adicionales",
                    placeholder="Hechos clave, nombres, fechas, etc...",
                    lines=3
                )
                
                with gr.Accordion("Fuentes adicionales", open=False):
                    doc_upload = gr.File(
                        label="Subir documento",
                        file_types=[".pdf", ".docx", ".xlsx", ".csv"]
                    )
                    audio_upload = gr.File(
                        label="Subir audio/video",
                        file_types=["audio", "video"]
                    )
                    url_input = gr.Textbox(
                        label="URL de referencia",
                        placeholder="https://..."
                    )
                    social_input = gr.Textbox(
                        label="URL de red social",
                        placeholder="https://..."
                    )
                
                length_slider = gr.Slider(
                    100, 1000, value=400,
                    label="Longitud de la noticia (palabras)"
                )
                tone_select = gr.Dropdown(
                    label="Tono periodístico",
                    choices=["Formal", "Neutral", "Investigativo", "Narrativo"],
                    value="Neutral"
                )
                
            with gr.Column(scale=2):
                output_news = gr.Textbox(
                    label="Noticia generada",
                    lines=18,
                    interactive=False
                )
                generate_btn = gr.Button("Generar Noticia", variant="primary")
                status = gr.Textbox(label="Estado", interactive=False)

        def process_and_generate(
            main_input: str,
            additional_data: str,
            document: Optional[str],
            audio: Optional[str],
            url: Optional[str],
            social_url: Optional[str],
            length: int,
            tone: str
        ):
            try:
                # Procesar fuentes adicionales
                doc_content = read_document(document) if document else ""
                audio_content = generator.transcribe_audio(audio) if audio else ""
                url_content = read_url(url) if url else ""
                social_content = process_social_media(social_url) if social_url else {"text": ""}

                # Construir prompt estructurado
                prompt = f"""
                ## Instrucciones:
                - Tema principal: {main_input}
                - Datos proporcionados: {additional_data}
                - Tono requerido: {tone}
                
                ## Fuentes:
                - Documento: {doc_content[:1000]}...
                - Audio: {audio_content[:500]}...
                - URL: {url_content[:1000]}...
                - Red social: {social_content['text'][:500]}...
                
                ## Requisitos:
                - Estructura profesional (titular, lead, cuerpo)
                - Incluir las 5W
                - Citas relevantes si aplica
                - Longitud: {length} palabras
                """

                return generator.generate_news(prompt, length), "✅ Generación exitosa"
            
            except Exception as e:
                logger.error(str(e))
                return f"Error: {str(e)}", "❌ Error en generación"

        generate_btn.click(
            fn=process_and_generate,
            inputs=[
                main_input,
                additional_data,
                doc_upload,
                audio_upload,
                url_input,
                social_input,
                length_slider,
                tone_select
            ],
            outputs=[output_news, status]
        )

    return app

if __name__ == "__main__":
    app = create_interface()
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )