import gradio as gr
import logging
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import whisper
from huggingface_hub import login
from pydub import AudioSegment
import requests
from bs4 import BeautifulSoup
from typing import Optional, Dict, Any
import fitz  # PyMuPDF
import os

# Logging configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Get Hugging Face token
HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
if not HUGGINGFACE_TOKEN:
    logger.warning("HUGGINGFACE_TOKEN not found in environment variables")
    raise ValueError("Please configure HUGGINGFACE_TOKEN in environment variables")

# Hugging Face authentication
login(token=HUGGINGFACE_TOKEN)

class NewsGenerator:
    def __init__(self):
        # Optimize for CPU
        self.device = "cpu"
        torch.set_grad_enabled(False)
        
        # Clear CUDA memory if exists
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            
        self.whisper_model = None
        self.llm_model = None
        self.tokenizer = None
        
        logger.info(f"Using device: {self.device.upper()}")
        self._load_models()

    def _load_models(self):
        """Optimized model loading for CPU with memory management"""
        try:
            # DeepSeek model configuration
            model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
            
            # Load tokenizer with optimizations
            self.tokenizer = AutoTokenizer.from_pretrained(
                model_name,
                use_fast=True,
                token=HUGGINGFACE_TOKEN
            )
            
            # Load model with memory optimizations
            self.llm_model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map="cpu",
                torch_dtype=torch.float32,
                low_cpu_mem_usage=True,
                token=HUGGINGFACE_TOKEN
            ).eval()

            # Load Whisper with minimal footprint
            self.whisper_model = whisper.load_model(
                "tiny.en",
                device=self.device
            )

            logger.info("Models loaded successfully")

        except Exception as e:
            logger.error(f"Error loading models: {str(e)}")
            raise

    def transcribe_audio(self, audio_path: str) -> str:
        """Audio transcription with error handling"""
        try:
            result = self.whisper_model.transcribe(audio_path)
            return result.get("text", "")
        except Exception as e:
            logger.error(f"Transcription error: {str(e)}")
            return ""

    def generate_news(self, prompt: str, max_length: int = 512) -> str:
        """News generation with DeepSeek"""
        try:
            formatted_prompt = (
                f"<|System|>\nYou are a professional journalist. Generate a news article "
                f"based on this data:\n{prompt}\n<|End|>\n"
                f"<|User|>\nWrite the article:<|End|>\n<|Assistant|>"
            )
            
            inputs = self.tokenizer(
                formatted_prompt,
                return_tensors="pt",
                truncation=True,
                max_length=1024
            ).to(self.device)

            with torch.inference_mode():
                outputs = self.llm_model.generate(
                    **inputs,
                    max_new_tokens=max_length,
                    temperature=0.7,
                    top_p=0.9,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id
                )
            
            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
        except Exception as e:
            logger.error(f"Generation error: {str(e)}")
            return "Error generating the article"

# Initialize generator at module level
generator = NewsGenerator()

# Create the demo interface
demo = gr.Blocks(
    title="AI News Generator",
    theme=gr.themes.Soft(),
    css="footer {display: none !important}"
)

with demo:
    gr.Markdown("""
    # 📰 AI News Generator
    Transform raw data into professional news articles using AI.
    """)
    
    with gr.Row():
        with gr.Column(scale=3):
            main_input = gr.Textbox(
                label="Main Topic",
                placeholder="Enter the main topic...",
                lines=3
            )
            additional_data = gr.Textbox(
                label="Additional Data",
                placeholder="Key facts...",
                lines=3
            )
            
            with gr.Accordion("Additional Sources", open=False):
                doc_upload = gr.File(
                    label="Upload Document",
                    file_types=[".pdf", ".docx", ".xlsx", ".csv"]
                )
                audio_upload = gr.File(
                    label="Upload Audio/Video",
                    file_types=["audio", "video"]
                )
            
            length_slider = gr.Slider(
                100, 1000,
                value=400,
                label="Article Length (words)"
            )
            tone_select = gr.Dropdown(
                label="Journalistic Tone",
                choices=["Formal", "Neutral", "Investigative", "Narrative"],
                value="Neutral"
            )
            
        with gr.Column(scale=2):
            output_news = gr.Textbox(
                label="Generated Article",
                lines=18,
                interactive=False
            )
            generate_btn = gr.Button("Generate Article", variant="primary")
            status = gr.Textbox(label="Status", interactive=False)

    def process_and_generate(
        main_input: str,
        additional_data: str,
        document: Optional[str],
        audio: Optional[str],
        length: int,
        tone: str
    ):
        try:
            # Process documents if provided
            doc_content = ""
            if document:
                doc_content = read_document(document)
            
            # Process audio if provided
            audio_content = ""
            if audio:
                audio_content = generator.transcribe_audio(audio)

            # Build prompt
            prompt = f"""
            Topic: {main_input}
            Data: {additional_data}
            Tone: {tone}
            Document Content: {doc_content[:500]}
            Audio Content: {audio_content[:300]}
            Length: {length} words
            """

            return generator.generate_news(prompt, length), "✅ Generation successful"
            
        except Exception as e:
            logger.error(str(e))
            return f"Error: {str(e)}", "❌ Generation error"

    generate_btn.click(
        fn=process_and_generate,
        inputs=[
            main_input,
            additional_data,
            doc_upload,
            audio_upload,
            length_slider,
            tone_select
        ],
        outputs=[output_news, status]
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860
    )