Spaces:
Runtime error
Runtime error
import gradio as gr | |
import logging | |
import os | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import whisper | |
from pydub import AudioSegment | |
import requests | |
from bs4 import BeautifulSoup | |
from typing import Optional, Dict, Any | |
from dataclasses import dataclass | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s) %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
class NewsConfig: | |
model_name: str = "meta-llama/Llama-2-3b-chat-hf" | |
max_tokens: int = 256 | |
temperature: float = 0.7 | |
top_p: float = 0.95 | |
class NewsGenerator: | |
def __init__(self): | |
self.config = NewsConfig() | |
self.tokenizer = None | |
self.model = None | |
self.whisper_model = None | |
self._initialize_models() | |
def _initialize_models(self): | |
"""Initialize models with efficient settings""" | |
try: | |
if not self.tokenizer: | |
self.tokenizer = AutoTokenizer.from_pretrained( | |
self.config.model_name, | |
use_fast=True, | |
model_max_length=self.config.max_tokens | |
) | |
self.tokenizer.pad_token = self.tokenizer.eos_token | |
if not self.model: | |
self.model = AutoModelForCausalLM.from_pretrained( | |
self.config.model_name, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
low_cpu_mem_usage=True, | |
use_safetensors=True | |
) | |
if not self.whisper_model: | |
self.whisper_model = whisper.load_model( | |
"tiny", | |
device="cuda" if torch.cuda.is_available() else "cpu" | |
) | |
except Exception as e: | |
logger.error(f"Error initializing models: {str(e)}") | |
raise | |
def transcribe_audio(self, audio_file: str) -> str: | |
"""Transcribe audio file with improved error handling""" | |
try: | |
if not audio_file: | |
return "Error: No audio file provided" | |
result = self.whisper_model.transcribe(audio_file) | |
return result.get("text", "Transcription failed") | |
except Exception as e: | |
logger.error(f"Audio transcription error: {str(e)}") | |
return f"Error transcribing audio: {str(e)}" | |
def generate_news(self, prompt: str) -> str: | |
"""Generate news article with optimized parameters""" | |
try: | |
with torch.inference_mode(): | |
outputs = self.model.generate( | |
inputs=self.tokenizer(prompt, return_tensors="pt").input_ids, | |
max_new_tokens=self.config.max_tokens, | |
temperature=self.config.temperature, | |
top_p=self.config.top_p, | |
do_sample=True, | |
early_stopping=True | |
) | |
return self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
except Exception as e: | |
logger.error(f"News generation error: {str(e)}") | |
return f"Error generating news: {str(e)}" | |
def read_document(document_path: str) -> str: | |
"""Read document content with better error handling""" | |
try: | |
if document_path.endswith(".pdf"): | |
with fitz.open(document_path) as doc: | |
return "\n".join(page.get_text() for page in doc) | |
elif document_path.endswith((".docx", ".xlsx", ".csv")): | |
content = "" | |
if document_path.endswith(".docx"): | |
import docx | |
doc = docx.Document(document_path) | |
content = "\n".join(p.text for p in doc.paragraphs) | |
elif document_path.endswith(".xlsx"): | |
import pandas as pd | |
content = pd.read_excel(document_path).to_string() | |
elif document_path.endswith(".csv"): | |
import pandas as pd | |
content = pd.read_csv(document_path).to_string() | |
return content | |
return "Unsupported file type" | |
except Exception as e: | |
logger.error(f"Document reading error: {str(e)}") | |
return f"Error reading document: {str(e)}" | |
def read_url(url: str) -> str: | |
"""Read URL content with better handling""" | |
try: | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() | |
return BeautifulSoup(response.content, 'html.parser').get_text() | |
except Exception as e: | |
logger.error(f"URL reading error: {str(e)}") | |
return f"Error reading URL: {str(e)}" | |
def process_social_media(url: str) -> Dict[str, Any]: | |
"""Process social media content with improved handling""" | |
try: | |
text = read_url(url) | |
return {"text": text, "video": None} | |
except Exception as e: | |
logger.error(f"Social media processing error: {str(e)}") | |
return {"text": None, "video": None} | |
def main(): | |
"""Main function to create and run the Gradio app""" | |
news_generator = NewsGenerator() | |
with gr.Blocks() as demo: | |
gr.Markdown("# Generador de Noticias Optimizado") | |
with gr.Row(): | |
instrucciones = gr.Textbox(label="Instrucciones", lines=2) | |
hechos = gr.Textbox(label="Hechos", lines=4) | |
tamaño = gr.Number(label="Tamaño (palabras)", value=100) | |
tono = gr.Dropdown(label="Tono", choices=["serio", "neutral", "divertido"], value="neutral") | |
with gr.Row(): | |
documento = gr.File(label="Documento", file_types=["pdf", "docx", "xlsx", "csv"]) | |
audio = gr.File(label="Audio/Video", file_types=["audio", "video"]) | |
url = gr.Textbox(label="URL") | |
social_url = gr.Textbox(label="URL de red social") | |
with gr.Row(): | |
generar = gr.Button("Generar Noticia") | |
noticia = gr.Textbox(label="Noticia Generada", lines=20) | |
transcripciones = gr.Textbox(label="Transcripciones", lines=10) | |
def generate_news_output( | |
instrucciones: str, | |
hechos: str, | |
tamaño: int, | |
tono: str, | |
documento: Optional[gr.File], | |
audio: Optional[gr.File], | |
url: Optional[str], | |
social_url: Optional[str] | |
): | |
try: | |
# Process document | |
if documento: | |
doc_content = read_document(documento.name) | |
else: | |
doc_content = "" | |
# Process audio | |
if audio: | |
audio_content = news_generator.transcribe_audio(audio.name) | |
else: | |
audio_content = "" | |
# Process URL | |
if url: | |
url_content = read_url(url) | |
else: | |
url_content = "" | |
# Process social media | |
if social_url: | |
social_content = process_social_media(social_url) | |
else: | |
social_content = {"text": "", "video": ""} | |
# Generate prompt | |
prompt = f"""[INST] Escribe una noticia basada en la siguiente información: | |
Instrucciones: {instrucciones} | |
Hechos: {hechos} | |
Documento: {doc_content} | |
Audio: {audio_content} | |
URL: {url_content} | |
Red Social: {social_content['text']} | |
Video: {social_content['video'] if social_content else ''} | |
Parámetros: | |
- Tamaño: {tamaño} palabras | |
- Tono: {tono} | |
- Incluye: Título, gancho, cuerpo, 5W | |
- Estilo periodístico | |
[/INST]""" | |
# Generate news | |
news = news_generator.generate_news(prompt) | |
return news, f"Transcripciones generadas correctamente" | |
except Exception as e: | |
return f"Error generando noticia: {str(e)}", f"Error: {str(e)}" | |
generate_news_output( | |
instrucciones, | |
hechos, | |
tamaño, | |
tono, | |
documento, | |
audio, | |
url, | |
social_url | |
)(generar, [noticia, transcripciones]) | |
if __name__ == "__main__": | |
demo.launch() | |
if __name__ == "__main__": | |
main() |