Spaces:

CamiloVega
/

News_AI

Runtime error

App Files Files Community

CamiloVega commited on 4 days ago

Commit

393aa1f

verified ·

1 Parent(s): fadd658

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -192

app.py CHANGED Viewed

@@ -1,235 +1,150 @@
 import gradio as gr
 import logging
-import os
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import whisper
 from pydub import AudioSegment
 import requests
 from bs4 import BeautifulSoup
-from typing import Optional, Dict, Any
-from dataclasses import dataclass
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(levelname)s) %(message)s'
 )
 logger = logging.getLogger(__name__)
-@dataclass
-class NewsConfig:
-    model_name: str = "meta-llama/Llama-2-3b-chat-hf"
-    max_tokens: int = 256
-    temperature: float = 0.7
-    top_p: float = 0.95
 class NewsGenerator:
     def __init__(self):
-        self.config = NewsConfig()
-        self.tokenizer = None
-        self.model = None
         self.whisper_model = None
-        self._initialize_models()
-    def _initialize_models(self):
-        """Initialize models with efficient settings"""
         try:
-            if not self.tokenizer:
-                self.tokenizer = AutoTokenizer.from_pretrained(
-                    self.config.model_name,
-                    use_fast=True,
-                    model_max_length=self.config.max_tokens
-                )
-                self.tokenizer.pad_token = self.tokenizer.eos_token
-            if not self.model:
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    self.config.model_name,
-                    device_map="auto",
-                    torch_dtype=torch.float16,
-                    low_cpu_mem_usage=True,
-                    use_safetensors=True
-                )
-            if not self.whisper_model:
-                self.whisper_model = whisper.load_model(
-                    "tiny",
-                    device="cuda" if torch.cuda.is_available() else "cpu"
-                )
         except Exception as e:
-            logger.error(f"Error initializing models: {str(e)}")
             raise
-    def transcribe_audio(self, audio_file: str) -> str:
-        """Transcribe audio file with improved error handling"""
         try:
-            if not audio_file:
-                return "Error: No audio file provided"
-            result = self.whisper_model.transcribe(audio_file)
-            return result.get("text", "Transcription failed")
         except Exception as e:
-            logger.error(f"Audio transcription error: {str(e)}")
-            return f"Error transcribing audio: {str(e)}"
-    def generate_news(self, prompt: str) -> str:
-        """Generate news article with optimized parameters"""
         try:
-            with torch.inference_mode():
-                outputs = self.model.generate(
-                    inputs=self.tokenizer(prompt, return_tensors="pt").input_ids,
-                    max_new_tokens=self.config.max_tokens,
-                    temperature=self.config.temperature,
-                    top_p=self.config.top_p,
-                    do_sample=True,
-                    early_stopping=True
-                )
-                return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
-            logger.error(f"News generation error: {str(e)}")
-            return f"Error generating news: {str(e)}"
-def read_document(document_path: str) -> str:
-    """Read document content with better error handling"""
     try:
-        if document_path.endswith(".pdf"):
-            with fitz.open(document_path) as doc:
-                return "\n".join(page.get_text() for page in doc)
-        elif document_path.endswith((".docx", ".xlsx", ".csv")):
-            content = ""
-            if document_path.endswith(".docx"):
-                import docx
-                doc = docx.Document(document_path)
-                content = "\n".join(p.text for p in doc.paragraphs)
-            elif document_path.endswith(".xlsx"):
-                import pandas as pd
-                content = pd.read_excel(document_path).to_string()
-            elif document_path.endswith(".csv"):
-                import pandas as pd
-                content = pd.read_csv(document_path).to_string()
-            return content
-        return "Unsupported file type"
     except Exception as e:
-        logger.error(f"Document reading error: {str(e)}")
-        return f"Error reading document: {str(e)}"
-def read_url(url: str) -> str:
-    """Read URL content with better handling"""
-    try:
-        response = requests.get(url, timeout=10)
-        response.raise_for_status()
-        return BeautifulSoup(response.content, 'html.parser').get_text()
-    except Exception as e:
-        logger.error(f"URL reading error: {str(e)}")
-        return f"Error reading URL: {str(e)}"
-def process_social_media(url: str) -> Dict[str, Any]:
-    """Process social media content with improved handling"""
-    try:
-        text = read_url(url)
-        return {"text": text, "video": None}
-    except Exception as e:
-        logger.error(f"Social media processing error: {str(e)}")
-        return {"text": None, "video": None}
-def main():
-    """Main function to create and run the Gradio app"""
-    news_generator = NewsGenerator()
-    with gr.Blocks() as demo:
-        gr.Markdown("# Generador de Noticias Optimizado")
-        with gr.Row():
-            instrucciones = gr.Textbox(label="Instrucciones", lines=2)
-            hechos = gr.Textbox(label="Hechos", lines=4)
-            tamaño = gr.Number(label="Tamaño (palabras)", value=100)
-            tono = gr.Dropdown(label="Tono", choices=["serio", "neutral", "divertido"], value="neutral")
         with gr.Row():
-            documento = gr.File(label="Documento", file_types=["pdf", "docx", "xlsx", "csv"])
-            audio = gr.File(label="Audio/Video", file_types=["audio", "video"])
-            url = gr.Textbox(label="URL")
-            social_url = gr.Textbox(label="URL de red social")
-        with gr.Row():
-            generar = gr.Button("Generar Noticia")
-            noticia = gr.Textbox(label="Noticia Generada", lines=20)
-            transcripciones = gr.Textbox(label="Transcripciones", lines=10)
-        def generate_news_output(
-            instrucciones: str,
-            hechos: str,
-            tamaño: int,
-            tono: str,
-            documento: Optional[gr.File],
-            audio: Optional[gr.File],
-            url: Optional[str],
-            social_url: Optional[str]
         ):
             try:
-                # Process document
-                if documento:
-                    doc_content = read_document(documento.name)
-                else:
-                    doc_content = ""
-                # Process audio
-                if audio:
-                    audio_content = news_generator.transcribe_audio(audio.name)
-                else:
-                    audio_content = ""
-                # Process URL
-                if url:
-                    url_content = read_url(url)
-                else:
-                    url_content = ""
-                # Process social media
-                if social_url:
-                    social_content = process_social_media(social_url)
-                else:
-                    social_content = {"text": "", "video": ""}
-                # Generate prompt
-                prompt = f"""[INST] Escribe una noticia basada en la siguiente información:
-                Instrucciones: {instrucciones}
-                Hechos: {hechos}
-                Documento: {doc_content}
-                Audio: {audio_content}
-                URL: {url_content}
-                Red Social: {social_content['text']}
-                Video: {social_content['video'] if social_content else ''}
-                Parámetros:
-                - Tamaño: {tamaño} palabras
-                - Tono: {tono}
-                - Incluye: Título, gancho, cuerpo, 5W
-                - Estilo periodístico
-                [/INST]"""
-                # Generate news
-                news = news_generator.generate_news(prompt)
-                return news, f"Transcripciones generadas correctamente"
             except Exception as e:
-                return f"Error generando noticia: {str(e)}", f"Error: {str(e)}"
-        generate_news_output(
-            instrucciones,
-            hechos,
-            tamaño,
-            tono,
-            documento,
-            audio,
-            url,
-            social_url
-        )(generar, [noticia, transcripciones])
-    if __name__ == "__main__":
-        demo.launch()
 if __name__ == "__main__":
-    main()

 import gradio as gr
 import logging
 import torch
+from transformers import pipeline, AutoTokenizer
 import whisper
 from pydub import AudioSegment
 import requests
 from bs4 import BeautifulSoup
+from typing import Optional
+# Configuración básica de logging
 logging.basicConfig(
     level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 class NewsGenerator:
     def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.whisper_model = None
+        self.news_pipeline = None
+        self.tokenizer = None
+        # Carga diferida de modelos
+        self._load_models()
+    def _load_models(self):
+        """Carga eficiente de modelos con gestión de memoria"""
         try:
+            # Modelo de texto más pequeño y eficiente
+            model_name = "facebook/bart-large-cnn"
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.news_pipeline = pipeline(
+                "summarization",
+                model=model_name,
+                device=self.device,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+            )
+            # Whisper optimizado
+            self.whisper_model = whisper.load_model(
+                "tiny.en" if self.device == "cpu" else "small",
+                device=self.device
+            )
         except Exception as e:
+            logger.error(f"Error loading models: {str(e)}")
             raise
+    def transcribe_audio(self, audio_path: str) -> str:
+        """Transcripción optimizada de audio"""
         try:
+            result = self.whisper_model.transcribe(audio_path)
+            return result.get("text", "")
         except Exception as e:
+            logger.error(f"Transcription error: {str(e)}")
+            return ""
+    def generate_news(self, inputs: str, max_length: int = 200) -> str:
+        """Generación de noticias con control de recursos"""
         try:
+            return self.news_pipeline(
+                inputs,
+                max_length=max_length,
+                min_length=30,
+                do_sample=False,  # Mejor rendimiento
+                truncation=True
+            )[0]['summary_text']
         except Exception as e:
+            logger.error(f"Generation error: {str(e)}")
+            return "Error generating content"
+def read_document(file_path: str) -> str:
+    """Lectura optimizada de documentos"""
     try:
+        if file_path.endswith(".pdf"):
+            import fitz
+            with fitz.open(file_path) as doc:
+                return " ".join(page.get_text() for page in doc)
+        elif file_path.endswith(".docx"):
+            from docx import Document
+            return " ".join(p.text for p in Document(file_path).paragraphs)
+        elif file_path.endswith((".xlsx", ".csv")):
+            import pandas as pd
+            return pd.read_excel(file_path).to_string() if file_path.endswith(".xlsx") else pd.read_csv(file_path).to_string()
+        return ""
     except Exception as e:
+        logger.error(f"Document error: {str(e)}")
+        return ""
+def create_interface():
+    """Interfaz optimizada con Gradio"""
+    generator = NewsGenerator()
+    with gr.Blocks(title="Generador de Noticias Eficiente") as app:
+        gr.Markdown("## 📰 Generador de Noticias Optimizado")
         with gr.Row():
+            with gr.Column(scale=2):
+                inputs = gr.Textbox(label="Entrada Principal", lines=5)
+                max_length = gr.Slider(100, 500, value=200, label="Longitud Máxima")
+                generate_btn = gr.Button("Generar Noticia", variant="primary")
+            with gr.Column(scale=1):
+                doc_upload = gr.File(label="Subir Documento", file_types=[".pdf", ".docx", ".xlsx", ".csv"])
+                audio_upload = gr.File(label="Subir Audio", file_types=["audio", "video"])
+        output = gr.Textbox(label="Noticia Generada", lines=10, interactive=False)
+        def process_inputs(
+            main_input: str,
+            document: Optional[str],
+            audio: Optional[str],
+            max_len: int
         ):
             try:
+                # Procesar documentos y audio
+                doc_content = read_document(document) if document else ""
+                audio_content = generator.transcribe_audio(audio) if audio else ""
+                # Construir prompt
+                full_input = "\n".join([
+                    main_input,
+                    f"Documento: {doc_content}",
+                    f"Audio: {audio_content}"
+                ])
+                return generator.generate_news(full_input, max_len)
             except Exception as e:
+                logger.error(f"Processing error: {str(e)}")
+                return f"Error: {str(e)}"
+        generate_btn.click(
+            fn=process_inputs,
+            inputs=[inputs, doc_upload, audio_upload, max_length],
+            outputs=output
+        )
+    return app
 if __name__ == "__main__":
+    app = create_interface()
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )