News_AI / app.py
CamiloVega's picture
Update app.py
4cfecb3 verified
raw
history blame
9.15 kB
import gradio as gr
import logging
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import whisper
from huggingface_hub import login
from pydub import AudioSegment
import requests
from bs4 import BeautifulSoup
from typing import Optional, Dict, Any
import fitz # PyMuPDF
# Configuración de logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Autenticación Hugging Face (reemplaza con tu token)
HF_TOKEN = "hf_tu_token_aqui"
login(token=HF_TOKEN)
class NewsGenerator:
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.whisper_model = None
self.llm_model = None
self.tokenizer = None
self._load_models()
def _load_models(self):
"""Carga optimizada de modelos con quantización 4-bit"""
try:
# Modelo Llama-2 7B Chat
model_name = "meta-llama/Llama-2-7b-chat-hf"
self.tokenizer = AutoTokenizer.from_pretrained(
model_name,
use_fast=True,
token=HF_TOKEN
)
self.llm_model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.float16,
load_in_4bit=True,
low_cpu_mem_usage=True,
token=HF_TOKEN
)
# Configuración de Whisper
self.whisper_model = whisper.load_model(
"small.en" if self.device == "cpu" else "medium",
device=self.device
)
except Exception as e:
logger.error(f"Error cargando modelos: {str(e)}")
raise
def transcribe_audio(self, audio_path: str) -> str:
"""Transcripción de audio con manejo de errores"""
try:
result = self.whisper_model.transcribe(audio_path)
return result.get("text", "")
except Exception as e:
logger.error(f"Error en transcripción: {str(e)}")
return ""
def generate_news(self, prompt: str, max_length: int = 512) -> str:
"""Generación de noticias con Llama-2"""
try:
inputs = self.tokenizer(
f"[INST]<<SYS>>Eres un periodista profesional. Genera una noticia bien estructurada basada en los siguientes datos:<</SYS>>\n{prompt}[/INST]",
return_tensors="pt"
).to(self.device)
outputs = self.llm_model.generate(
**inputs,
max_new_tokens=max_length,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
except Exception as e:
logger.error(f"Error generando noticia: {str(e)}")
return "Error en generación"
def read_document(file_path: str) -> str:
"""Lectura optimizada de documentos"""
try:
if file_path.endswith(".pdf"):
with fitz.open(file_path) as doc:
return " ".join(page.get_text() for page in doc)
elif file_path.endswith(".docx"):
from docx import Document
return " ".join(p.text for p in Document(file_path).paragraphs)
elif file_path.endswith(".xlsx"):
import pandas as pd
return pd.read_excel(file_path).to_string()
elif file_path.endswith(".csv"):
import pandas as pd
return pd.read_csv(file_path).to_string()
return ""
except Exception as e:
logger.error(f"Error leyendo documento: {str(e)}")
return ""
def read_url(url: str) -> str:
"""Extracción de contenido web"""
try:
response = requests.get(url, timeout=15)
response.raise_for_status()
return BeautifulSoup(response.content, 'html.parser').get_text(separator=' ', strip=True)
except Exception as e:
logger.error(f"Error leyendo URL: {str(e)}")
return ""
def process_social_media(url: str) -> Dict[str, Any]:
"""Procesamiento de contenido social"""
try:
text = read_url(url)
return {"text": text, "video": None}
except Exception as e:
logger.error(f"Error procesando red social: {str(e)}")
return {"text": "", "video": None}
def create_interface():
"""Interfaz de usuario con Gradio"""
generator = NewsGenerator()
with gr.Blocks(title="Generador de Noticias AI", theme=gr.themes.Soft()) as app:
gr.Markdown("# 📰 Generador de Noticias Profesional")
with gr.Row():
with gr.Column(scale=3):
main_input = gr.Textbox(
label="Tema principal",
placeholder="Ingrese el tema o instrucciones principales...",
lines=3
)
additional_data = gr.Textbox(
label="Datos adicionales",
placeholder="Hechos clave, nombres, fechas, etc...",
lines=3
)
with gr.Accordion("Fuentes adicionales", open=False):
doc_upload = gr.File(
label="Subir documento",
file_types=[".pdf", ".docx", ".xlsx", ".csv"]
)
audio_upload = gr.File(
label="Subir audio/video",
file_types=["audio", "video"]
)
url_input = gr.Textbox(
label="URL de referencia",
placeholder="https://..."
)
social_input = gr.Textbox(
label="URL de red social",
placeholder="https://..."
)
length_slider = gr.Slider(
100, 1000, value=400,
label="Longitud de la noticia (palabras)"
)
tone_select = gr.Dropdown(
label="Tono periodístico",
choices=["Formal", "Neutral", "Investigativo", "Narrativo"],
value="Neutral"
)
with gr.Column(scale=2):
output_news = gr.Textbox(
label="Noticia generada",
lines=18,
interactive=False
)
generate_btn = gr.Button("Generar Noticia", variant="primary")
status = gr.Textbox(label="Estado", interactive=False)
def process_and_generate(
main_input: str,
additional_data: str,
document: Optional[str],
audio: Optional[str],
url: Optional[str],
social_url: Optional[str],
length: int,
tone: str
):
try:
# Procesar fuentes adicionales
doc_content = read_document(document) if document else ""
audio_content = generator.transcribe_audio(audio) if audio else ""
url_content = read_url(url) if url else ""
social_content = process_social_media(social_url) if social_url else {"text": ""}
# Construir prompt estructurado
prompt = f"""
## Instrucciones:
- Tema principal: {main_input}
- Datos proporcionados: {additional_data}
- Tono requerido: {tone}
## Fuentes:
- Documento: {doc_content[:1000]}...
- Audio: {audio_content[:500]}...
- URL: {url_content[:1000]}...
- Red social: {social_content['text'][:500]}...
## Requisitos:
- Estructura profesional (titular, lead, cuerpo)
- Incluir las 5W
- Citas relevantes si aplica
- Longitud: {length} palabras
"""
return generator.generate_news(prompt, length), "✅ Generación exitosa"
except Exception as e:
logger.error(str(e))
return f"Error: {str(e)}", "❌ Error en generación"
generate_btn.click(
fn=process_and_generate,
inputs=[
main_input,
additional_data,
doc_upload,
audio_upload,
url_input,
social_input,
length_slider,
tone_select
],
outputs=[output_news, status]
)
return app
if __name__ == "__main__":
app = create_interface()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)