Spaces:
Runtime error
Runtime error
import gradio as gr | |
import logging | |
import torch | |
import numpy as np | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import whisper | |
from huggingface_hub import login | |
from pydub import AudioSegment | |
import requests | |
from bs4 import BeautifulSoup | |
from typing import Optional, Dict, Any | |
import fitz # PyMuPDF | |
# Configuración de logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
# Autenticación Hugging Face (reemplaza con tu token) | |
HF_TOKEN = "hf_tu_token_aqui" | |
login(token=HF_TOKEN) | |
class NewsGenerator: | |
def __init__(self): | |
self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
self.whisper_model = None | |
self.llm_model = None | |
self.tokenizer = None | |
self._load_models() | |
def _load_models(self): | |
"""Carga optimizada de modelos con quantización 4-bit""" | |
try: | |
# Modelo Llama-2 7B Chat | |
model_name = "meta-llama/Llama-2-7b-chat-hf" | |
self.tokenizer = AutoTokenizer.from_pretrained( | |
model_name, | |
use_fast=True, | |
token=HF_TOKEN | |
) | |
self.llm_model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
load_in_4bit=True, | |
low_cpu_mem_usage=True, | |
token=HF_TOKEN | |
) | |
# Configuración de Whisper | |
self.whisper_model = whisper.load_model( | |
"small.en" if self.device == "cpu" else "medium", | |
device=self.device | |
) | |
except Exception as e: | |
logger.error(f"Error cargando modelos: {str(e)}") | |
raise | |
def transcribe_audio(self, audio_path: str) -> str: | |
"""Transcripción de audio con manejo de errores""" | |
try: | |
result = self.whisper_model.transcribe(audio_path) | |
return result.get("text", "") | |
except Exception as e: | |
logger.error(f"Error en transcripción: {str(e)}") | |
return "" | |
def generate_news(self, prompt: str, max_length: int = 512) -> str: | |
"""Generación de noticias con Llama-2""" | |
try: | |
inputs = self.tokenizer( | |
f"[INST]<<SYS>>Eres un periodista profesional. Genera una noticia bien estructurada basada en los siguientes datos:<</SYS>>\n{prompt}[/INST]", | |
return_tensors="pt" | |
).to(self.device) | |
outputs = self.llm_model.generate( | |
**inputs, | |
max_new_tokens=max_length, | |
temperature=0.7, | |
top_p=0.9, | |
do_sample=True, | |
pad_token_id=self.tokenizer.eos_token_id | |
) | |
return self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
except Exception as e: | |
logger.error(f"Error generando noticia: {str(e)}") | |
return "Error en generación" | |
def read_document(file_path: str) -> str: | |
"""Lectura optimizada de documentos""" | |
try: | |
if file_path.endswith(".pdf"): | |
with fitz.open(file_path) as doc: | |
return " ".join(page.get_text() for page in doc) | |
elif file_path.endswith(".docx"): | |
from docx import Document | |
return " ".join(p.text for p in Document(file_path).paragraphs) | |
elif file_path.endswith(".xlsx"): | |
import pandas as pd | |
return pd.read_excel(file_path).to_string() | |
elif file_path.endswith(".csv"): | |
import pandas as pd | |
return pd.read_csv(file_path).to_string() | |
return "" | |
except Exception as e: | |
logger.error(f"Error leyendo documento: {str(e)}") | |
return "" | |
def read_url(url: str) -> str: | |
"""Extracción de contenido web""" | |
try: | |
response = requests.get(url, timeout=15) | |
response.raise_for_status() | |
return BeautifulSoup(response.content, 'html.parser').get_text(separator=' ', strip=True) | |
except Exception as e: | |
logger.error(f"Error leyendo URL: {str(e)}") | |
return "" | |
def process_social_media(url: str) -> Dict[str, Any]: | |
"""Procesamiento de contenido social""" | |
try: | |
text = read_url(url) | |
return {"text": text, "video": None} | |
except Exception as e: | |
logger.error(f"Error procesando red social: {str(e)}") | |
return {"text": "", "video": None} | |
def create_interface(): | |
"""Interfaz de usuario con Gradio""" | |
generator = NewsGenerator() | |
with gr.Blocks(title="Generador de Noticias AI", theme=gr.themes.Soft()) as app: | |
gr.Markdown("# 📰 Generador de Noticias Profesional") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
main_input = gr.Textbox( | |
label="Tema principal", | |
placeholder="Ingrese el tema o instrucciones principales...", | |
lines=3 | |
) | |
additional_data = gr.Textbox( | |
label="Datos adicionales", | |
placeholder="Hechos clave, nombres, fechas, etc...", | |
lines=3 | |
) | |
with gr.Accordion("Fuentes adicionales", open=False): | |
doc_upload = gr.File( | |
label="Subir documento", | |
file_types=[".pdf", ".docx", ".xlsx", ".csv"] | |
) | |
audio_upload = gr.File( | |
label="Subir audio/video", | |
file_types=["audio", "video"] | |
) | |
url_input = gr.Textbox( | |
label="URL de referencia", | |
placeholder="https://..." | |
) | |
social_input = gr.Textbox( | |
label="URL de red social", | |
placeholder="https://..." | |
) | |
length_slider = gr.Slider( | |
100, 1000, value=400, | |
label="Longitud de la noticia (palabras)" | |
) | |
tone_select = gr.Dropdown( | |
label="Tono periodístico", | |
choices=["Formal", "Neutral", "Investigativo", "Narrativo"], | |
value="Neutral" | |
) | |
with gr.Column(scale=2): | |
output_news = gr.Textbox( | |
label="Noticia generada", | |
lines=18, | |
interactive=False | |
) | |
generate_btn = gr.Button("Generar Noticia", variant="primary") | |
status = gr.Textbox(label="Estado", interactive=False) | |
def process_and_generate( | |
main_input: str, | |
additional_data: str, | |
document: Optional[str], | |
audio: Optional[str], | |
url: Optional[str], | |
social_url: Optional[str], | |
length: int, | |
tone: str | |
): | |
try: | |
# Procesar fuentes adicionales | |
doc_content = read_document(document) if document else "" | |
audio_content = generator.transcribe_audio(audio) if audio else "" | |
url_content = read_url(url) if url else "" | |
social_content = process_social_media(social_url) if social_url else {"text": ""} | |
# Construir prompt estructurado | |
prompt = f""" | |
## Instrucciones: | |
- Tema principal: {main_input} | |
- Datos proporcionados: {additional_data} | |
- Tono requerido: {tone} | |
## Fuentes: | |
- Documento: {doc_content[:1000]}... | |
- Audio: {audio_content[:500]}... | |
- URL: {url_content[:1000]}... | |
- Red social: {social_content['text'][:500]}... | |
## Requisitos: | |
- Estructura profesional (titular, lead, cuerpo) | |
- Incluir las 5W | |
- Citas relevantes si aplica | |
- Longitud: {length} palabras | |
""" | |
return generator.generate_news(prompt, length), "✅ Generación exitosa" | |
except Exception as e: | |
logger.error(str(e)) | |
return f"Error: {str(e)}", "❌ Error en generación" | |
generate_btn.click( | |
fn=process_and_generate, | |
inputs=[ | |
main_input, | |
additional_data, | |
doc_upload, | |
audio_upload, | |
url_input, | |
social_input, | |
length_slider, | |
tone_select | |
], | |
outputs=[output_news, status] | |
) | |
return app | |
if __name__ == "__main__": | |
app = create_interface() | |
app.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, | |
show_error=True | |
) |