News_AI / app.py
CamiloVega's picture
Update app.py
393aa1f verified
raw
history blame
5.26 kB
import gradio as gr
import logging
import torch
from transformers import pipeline, AutoTokenizer
import whisper
from pydub import AudioSegment
import requests
from bs4 import BeautifulSoup
from typing import Optional
# Configuraci贸n b谩sica de logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class NewsGenerator:
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.whisper_model = None
self.news_pipeline = None
self.tokenizer = None
# Carga diferida de modelos
self._load_models()
def _load_models(self):
"""Carga eficiente de modelos con gesti贸n de memoria"""
try:
# Modelo de texto m谩s peque帽o y eficiente
model_name = "facebook/bart-large-cnn"
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.news_pipeline = pipeline(
"summarization",
model=model_name,
device=self.device,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
)
# Whisper optimizado
self.whisper_model = whisper.load_model(
"tiny.en" if self.device == "cpu" else "small",
device=self.device
)
except Exception as e:
logger.error(f"Error loading models: {str(e)}")
raise
def transcribe_audio(self, audio_path: str) -> str:
"""Transcripci贸n optimizada de audio"""
try:
result = self.whisper_model.transcribe(audio_path)
return result.get("text", "")
except Exception as e:
logger.error(f"Transcription error: {str(e)}")
return ""
def generate_news(self, inputs: str, max_length: int = 200) -> str:
"""Generaci贸n de noticias con control de recursos"""
try:
return self.news_pipeline(
inputs,
max_length=max_length,
min_length=30,
do_sample=False, # Mejor rendimiento
truncation=True
)[0]['summary_text']
except Exception as e:
logger.error(f"Generation error: {str(e)}")
return "Error generating content"
def read_document(file_path: str) -> str:
"""Lectura optimizada de documentos"""
try:
if file_path.endswith(".pdf"):
import fitz
with fitz.open(file_path) as doc:
return " ".join(page.get_text() for page in doc)
elif file_path.endswith(".docx"):
from docx import Document
return " ".join(p.text for p in Document(file_path).paragraphs)
elif file_path.endswith((".xlsx", ".csv")):
import pandas as pd
return pd.read_excel(file_path).to_string() if file_path.endswith(".xlsx") else pd.read_csv(file_path).to_string()
return ""
except Exception as e:
logger.error(f"Document error: {str(e)}")
return ""
def create_interface():
"""Interfaz optimizada con Gradio"""
generator = NewsGenerator()
with gr.Blocks(title="Generador de Noticias Eficiente") as app:
gr.Markdown("## 馃摪 Generador de Noticias Optimizado")
with gr.Row():
with gr.Column(scale=2):
inputs = gr.Textbox(label="Entrada Principal", lines=5)
max_length = gr.Slider(100, 500, value=200, label="Longitud M谩xima")
generate_btn = gr.Button("Generar Noticia", variant="primary")
with gr.Column(scale=1):
doc_upload = gr.File(label="Subir Documento", file_types=[".pdf", ".docx", ".xlsx", ".csv"])
audio_upload = gr.File(label="Subir Audio", file_types=["audio", "video"])
output = gr.Textbox(label="Noticia Generada", lines=10, interactive=False)
def process_inputs(
main_input: str,
document: Optional[str],
audio: Optional[str],
max_len: int
):
try:
# Procesar documentos y audio
doc_content = read_document(document) if document else ""
audio_content = generator.transcribe_audio(audio) if audio else ""
# Construir prompt
full_input = "\n".join([
main_input,
f"Documento: {doc_content}",
f"Audio: {audio_content}"
])
return generator.generate_news(full_input, max_len)
except Exception as e:
logger.error(f"Processing error: {str(e)}")
return f"Error: {str(e)}"
generate_btn.click(
fn=process_inputs,
inputs=[inputs, doc_upload, audio_upload, max_length],
outputs=output
)
return app
if __name__ == "__main__":
app = create_interface()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)