Spaces:

CamiloVega
/

News_AI

Runtime error

App Files Files Community

CamiloVega commited on 4 days ago

Commit

07cea38

verified ·

1 Parent(s): aeb217c

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -73

app.py CHANGED Viewed

@@ -12,25 +12,25 @@ from typing import Optional, Dict, Any
 import fitz  # PyMuPDF
 import os
-# Logging configuration
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
-# Get Hugging Face token from environment variable
 HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
 if not HUGGINGFACE_TOKEN:
-    logger.warning("HUGGINGFACE_TOKEN not found in environment variables")
-    raise ValueError("HUGGINGFACE_TOKEN is not configured in environment variables")
-# Hugging Face Authentication
 login(token=HUGGINGFACE_TOKEN)
 class NewsGenerator:
     def __init__(self):
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.whisper_model = None
         self.llm_model = None
         self.tokenizer = None
@@ -38,10 +38,10 @@ class NewsGenerator:
         self._load_models()
     def _load_models(self):
-        """Optimized model loading with 4-bit quantization"""
         try:
-            # Llama-2 7B Chat Model
-            model_name = "meta-llama/Llama-2-7b-chat-hf"
             self.tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
                 use_fast=True,
@@ -50,37 +50,43 @@ class NewsGenerator:
             self.llm_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
-                device_map="auto",
-                torch_dtype=torch.float16,
-                load_in_4bit=True,
                 low_cpu_mem_usage=True,
                 token=HUGGINGFACE_TOKEN
             )
-            # Whisper Configuration
             self.whisper_model = whisper.load_model(
-                "small.en" if self.device == "cpu" else "medium",
-                device=self.device
             )
         except Exception as e:
-            logger.error(f"Error loading models: {str(e)}")
             raise
     def transcribe_audio(self, audio_path: str) -> str:
-        """Audio transcription with error handling"""
         try:
             result = self.whisper_model.transcribe(audio_path)
             return result.get("text", "")
         except Exception as e:
-            logger.error(f"Transcription error: {str(e)}")
             return ""
     def generate_news(self, prompt: str, max_length: int = 512) -> str:
-        """News generation with Llama-2"""
         try:
             inputs = self.tokenizer(
-                f"[INST]<<SYS>>You are a professional journalist. Generate a well-structured news article based on the following data:<</SYS>>\n{prompt}[/INST]",
                 return_tensors="pt"
             ).to(self.device)
@@ -96,11 +102,11 @@ class NewsGenerator:
             return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
-            logger.error(f"Generation error: {str(e)}")
-            return "Generation error"
 def read_document(file_path: str) -> str:
-    """Optimized document reading"""
     try:
         if file_path.endswith(".pdf"):
             with fitz.open(file_path) as doc:
@@ -116,99 +122,98 @@ def read_document(file_path: str) -> str:
             return pd.read_csv(file_path).to_string()
         return ""
     except Exception as e:
-        logger.error(f"Document reading error: {str(e)}")
         return ""
 def read_url(url: str) -> str:
-    """Web content extraction"""
     try:
         response = requests.get(url, timeout=15)
         response.raise_for_status()
         return BeautifulSoup(response.content, 'html.parser').get_text(separator=' ', strip=True)
     except Exception as e:
-        logger.error(f"URL reading error: {str(e)}")
         return ""
 def process_social_media(url: str) -> Dict[str, Any]:
-    """Social media content processing"""
     try:
         text = read_url(url)
         return {"text": text, "video": None}
     except Exception as e:
-        logger.error(f"Social media processing error: {str(e)}")
         return {"text": "", "video": None}
 def create_interface():
-    """Gradio user interface"""
     generator = NewsGenerator()
-    with gr.Blocks(title="AI News Generator", theme=gr.themes.Soft()) as app:
         gr.Markdown("""
-        # 📰 AI News Generator
-        Transform raw data into professionally written news articles using advanced AI. This tool combines
-        multiple sources including text, documents, audio, and web content to generate comprehensive news stories.
-        ### Features:
-        - Multi-source input processing (text, documents, audio, web content)
-        - Professional journalistic writing styles
-        - Automatic audio transcription
-        - Customizable article length and tone
         ---
-        Developed by Camilo Vega, AI Consultant
-        [LinkedIn Profile](https://www.linkedin.com/in/camilo-vega-169084b1/)
         """)
         with gr.Row():
             with gr.Column(scale=3):
                 main_input = gr.Textbox(
-                    label="Main Topic",
-                    placeholder="Enter the main topic or instructions...",
                     lines=3
                 )
                 additional_data = gr.Textbox(
-                    label="Additional Data",
-                    placeholder="Key facts, names, dates, etc...",
                     lines=3
                 )
-                with gr.Accordion("Additional Sources", open=False):
                     doc_upload = gr.File(
-                        label="Upload Document",
                         file_types=[".pdf", ".docx", ".xlsx", ".csv"]
                     )
                     audio_upload = gr.File(
-                        label="Upload Audio/Video",
                         file_types=["audio", "video"]
                     )
                     url_input = gr.Textbox(
-                        label="Reference URL",
                         placeholder="https://..."
                     )
                     social_input = gr.Textbox(
-                        label="Social Media URL",
                         placeholder="https://..."
                     )
                 length_slider = gr.Slider(
                     100, 1000, value=400,
-                    label="Article Length (words)"
                 )
                 tone_select = gr.Dropdown(
-                    label="Journalistic Tone",
-                    choices=["Formal", "Neutral", "Investigative", "Narrative"],
                     value="Neutral"
                 )
             with gr.Column(scale=2):
                 output_news = gr.Textbox(
-                    label="Generated Article",
                     lines=18,
                     interactive=False
                 )
-                generate_btn = gr.Button("Generate Article", variant="primary")
-                status = gr.Textbox(label="Status", interactive=False)
         def process_and_generate(
             main_input: str,
@@ -221,37 +226,37 @@ def create_interface():
             tone: str
         ):
             try:
-                # Process additional sources
                 doc_content = read_document(document) if document else ""
                 audio_content = generator.transcribe_audio(audio) if audio else ""
                 url_content = read_url(url) if url else ""
                 social_content = process_social_media(social_url) if social_url else {"text": ""}
-                # Build structured prompt
                 prompt = f"""
-                ## Instructions:
-                - Main Topic: {main_input}
-                - Provided Data: {additional_data}
-                - Required Tone: {tone}
-                ## Sources:
-                - Document: {doc_content[:1000]}...
-                - Audio: {audio_content[:500]}...
-                - URL: {url_content[:1000]}...
-                - Social Media: {social_content['text'][:500]}...
-                ## Requirements:
-                - Professional structure (headline, lead, body)
-                - Include the 5W's
-                - Relevant quotes if applicable
-                - Length: {length} words
                 """
-                return generator.generate_news(prompt, length), "✅ Generation successful"
             except Exception as e:
                 logger.error(str(e))
-                return f"Error: {str(e)}", "❌ Generation error"
         generate_btn.click(
             fn=process_and_generate,

 import fitz  # PyMuPDF
 import os
+# Configuración de logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
+# Obtener token de Hugging Face
 HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
 if not HUGGINGFACE_TOKEN:
+    logger.warning("HUGGINGFACE_TOKEN no encontrado en variables de entorno")
+    raise ValueError("Configura HUGGINGFACE_TOKEN en las variables de entorno")
+# Autenticación en Hugging Face
 login(token=HUGGINGFACE_TOKEN)
 class NewsGenerator:
     def __init__(self):
+        self.device = "cpu"  # Forzar uso de CPU
         self.whisper_model = None
         self.llm_model = None
         self.tokenizer = None
         self._load_models()
     def _load_models(self):
+        """Carga optimizada de modelos para CPU"""
         try:
+            # Modelo DeepSeek ligero
+            model_name = "deepseek-ai/deepseek-r1-distill-queen-1.5b"
             self.tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
                 use_fast=True,
             self.llm_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
+                device_map="cpu",
+                torch_dtype=torch.float32,  # Usar float32 para CPU
                 low_cpu_mem_usage=True,
                 token=HUGGINGFACE_TOKEN
             )
+            # Configuración de Whisper (versión reducida)
             self.whisper_model = whisper.load_model(
+                "tiny.en",
+                device="cpu"
             )
         except Exception as e:
+            logger.error(f"Error cargando modelos: {str(e)}")
             raise
     def transcribe_audio(self, audio_path: str) -> str:
+        """Transcripción de audio con manejo de errores"""
         try:
             result = self.whisper_model.transcribe(audio_path)
             return result.get("text", "")
         except Exception as e:
+            logger.error(f"Error en transcripción: {str(e)}")
             return ""
     def generate_news(self, prompt: str, max_length: int = 512) -> str:
+        """Generación de noticias con DeepSeek"""
         try:
+            # Formato de prompt específico para DeepSeek
+            formatted_prompt = (
+                f"<|System|>\nEres un periodista profesional. Genera un artículo noticioso "
+                f"basado en estos datos:\n{prompt}\n<|End|>\n"
+                f"<|User|>\nRedacta el artículo:<|End|>\n<|Assistant|>"
+            )
             inputs = self.tokenizer(
+                formatted_prompt,
                 return_tensors="pt"
             ).to(self.device)
             return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
+            logger.error(f"Error en generación: {str(e)}")
+            return "Error generando el artículo"
 def read_document(file_path: str) -> str:
+    """Lectura optimizada de documentos"""
     try:
         if file_path.endswith(".pdf"):
             with fitz.open(file_path) as doc:
             return pd.read_csv(file_path).to_string()
         return ""
     except Exception as e:
+        logger.error(f"Error leyendo documento: {str(e)}")
         return ""
 def read_url(url: str) -> str:
+    """Extracción de contenido web"""
     try:
         response = requests.get(url, timeout=15)
         response.raise_for_status()
         return BeautifulSoup(response.content, 'html.parser').get_text(separator=' ', strip=True)
     except Exception as e:
+        logger.error(f"Error leyendo URL: {str(e)}")
         return ""
 def process_social_media(url: str) -> Dict[str, Any]:
+    """Procesamiento de redes sociales"""
     try:
         text = read_url(url)
         return {"text": text, "video": None}
     except Exception as e:
+        logger.error(f"Error procesando red social: {str(e)}")
         return {"text": "", "video": None}
 def create_interface():
+    """Interfaz de usuario de Gradio"""
     generator = NewsGenerator()
+    with gr.Blocks(title="Generador de Noticias AI", theme=gr.themes.Soft()) as app:
         gr.Markdown("""
+        # 📰 Generador de Noticias AI
+        Transforma datos en bruto en artículos periodísticos profesionales usando IA avanzada.
+        ### Características:
+        - Procesamiento multi-fuente (texto, documentos, audio, web)
+        - Estilos periodísticos profesionales
+        - Transcripción automática de audio
+        - Longitud y tono personalizables
         ---
+        Desarrollado por Camilo Vega, Consultor en IA
+        [Perfil de LinkedIn](https://www.linkedin.com/in/camilo-vega-169084b1/)
         """)
         with gr.Row():
             with gr.Column(scale=3):
                 main_input = gr.Textbox(
+                    label="Tema Principal",
+                    placeholder="Ingrese el tema principal o instrucciones...",
                     lines=3
                 )
                 additional_data = gr.Textbox(
+                    label="Datos Adicionales",
+                    placeholder="Hechos clave, nombres, fechas...",
                     lines=3
                 )
+                with gr.Accordion("Fuentes Adicionales", open=False):
                     doc_upload = gr.File(
+                        label="Subir Documento",
                         file_types=[".pdf", ".docx", ".xlsx", ".csv"]
                     )
                     audio_upload = gr.File(
+                        label="Subir Audio/Video",
                         file_types=["audio", "video"]
                     )
                     url_input = gr.Textbox(
+                        label="URL de Referencia",
                         placeholder="https://..."
                     )
                     social_input = gr.Textbox(
+                        label="URL de Red Social",
                         placeholder="https://..."
                     )
                 length_slider = gr.Slider(
                     100, 1000, value=400,
+                    label="Longitud del Artículo (palabras)"
                 )
                 tone_select = gr.Dropdown(
+                    label="Tono Periodístico",
+                    choices=["Formal", "Neutral", "Investigativo", "Narrativo"],
                     value="Neutral"
                 )
             with gr.Column(scale=2):
                 output_news = gr.Textbox(
+                    label="Artículo Generado",
                     lines=18,
                     interactive=False
                 )
+                generate_btn = gr.Button("Generar Artículo", variant="primary")
+                status = gr.Textbox(label="Estado", interactive=False)
         def process_and_generate(
             main_input: str,
             tone: str
         ):
             try:
+                # Procesar fuentes adicionales
                 doc_content = read_document(document) if document else ""
                 audio_content = generator.transcribe_audio(audio) if audio else ""
                 url_content = read_url(url) if url else ""
                 social_content = process_social_media(social_url) if social_url else {"text": ""}
+                # Construir prompt estructurado
                 prompt = f"""
+                ## Instrucciones:
+                - Tema Principal: {main_input}
+                - Datos Proporcionados: {additional_data}
+                - Tono Requerido: {tone}
+                ## Fuentes:
+                - Documento: {doc_content[:500]}...
+                - Audio: {audio_content[:300]}...
+                - URL: {url_content[:500]}...
+                - Red Social: {social_content['text'][:300]}...
+                ## Requisitos:
+                - Estructura profesional (titular, lead, cuerpo)
+                - Incluir las 5W
+                - Citas relevantes si aplica
+                - Longitud: {length} palabras
                 """
+                return generator.generate_news(prompt, length), "✅ Generación exitosa"
             except Exception as e:
                 logger.error(str(e))
+                return f"Error: {str(e)}", "❌ Error en generación"
         generate_btn.click(
             fn=process_and_generate,