Spaces:

CamiloVega
/

News_AI

Runtime error

App Files Files Community

News_AI / app.py

CamiloVega

Update app.py

e4cde65 verified 16 days ago

raw

history blame

8.57 kB

	import gradio as gr
	import logging
	import os
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import whisper
	from pydub import AudioSegment
	import requests
	from bs4 import BeautifulSoup
	from typing import Optional, Dict, Any
	from dataclasses import dataclass

	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s) %(message)s'
	)
	logger = logging.getLogger(__name__)

	@dataclass
	class NewsConfig:
	model_name: str = "meta-llama/Llama-2-3b-chat-hf"
	max_tokens: int = 256
	temperature: float = 0.7
	top_p: float = 0.95

	class NewsGenerator:
	def __init__(self):
	self.config = NewsConfig()
	self.tokenizer = None
	self.model = None
	self.whisper_model = None
	self._initialize_models()

	def _initialize_models(self):
	"""Initialize models with efficient settings"""
	try:
	if not self.tokenizer:
	self.tokenizer = AutoTokenizer.from_pretrained(
	self.config.model_name,
	use_fast=True,
	model_max_length=self.config.max_tokens
	)
	self.tokenizer.pad_token = self.tokenizer.eos_token

	if not self.model:
	self.model = AutoModelForCausalLM.from_pretrained(
	self.config.model_name,
	device_map="auto",
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True,
	use_safetensors=True
	)

	if not self.whisper_model:
	self.whisper_model = whisper.load_model(
	"tiny",
	device="cuda" if torch.cuda.is_available() else "cpu"
	)

	except Exception as e:
	logger.error(f"Error initializing models: {str(e)}")
	raise

	def transcribe_audio(self, audio_file: str) -> str:
	"""Transcribe audio file with improved error handling"""
	try:
	if not audio_file:
	return "Error: No audio file provided"

	result = self.whisper_model.transcribe(audio_file)
	return result.get("text", "Transcription failed")

	except Exception as e:
	logger.error(f"Audio transcription error: {str(e)}")
	return f"Error transcribing audio: {str(e)}"

	def generate_news(self, prompt: str) -> str:
	"""Generate news article with optimized parameters"""
	try:
	with torch.inference_mode():
	outputs = self.model.generate(
	inputs=self.tokenizer(prompt, return_tensors="pt").input_ids,
	max_new_tokens=self.config.max_tokens,
	temperature=self.config.temperature,
	top_p=self.config.top_p,
	do_sample=True,
	early_stopping=True
	)
	return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
	except Exception as e:
	logger.error(f"News generation error: {str(e)}")
	return f"Error generating news: {str(e)}"

	def read_document(document_path: str) -> str:
	"""Read document content with better error handling"""
	try:
	if document_path.endswith(".pdf"):
	with fitz.open(document_path) as doc:
	return "\n".join(page.get_text() for page in doc)
	elif document_path.endswith((".docx", ".xlsx", ".csv")):
	content = ""
	if document_path.endswith(".docx"):
	import docx
	doc = docx.Document(document_path)
	content = "\n".join(p.text for p in doc.paragraphs)
	elif document_path.endswith(".xlsx"):
	import pandas as pd
	content = pd.read_excel(document_path).to_string()
	elif document_path.endswith(".csv"):
	import pandas as pd
	content = pd.read_csv(document_path).to_string()
	return content
	return "Unsupported file type"
	except Exception as e:
	logger.error(f"Document reading error: {str(e)}")
	return f"Error reading document: {str(e)}"

	def read_url(url: str) -> str:
	"""Read URL content with better handling"""
	try:
	response = requests.get(url, timeout=10)
	response.raise_for_status()
	return BeautifulSoup(response.content, 'html.parser').get_text()
	except Exception as e:
	logger.error(f"URL reading error: {str(e)}")
	return f"Error reading URL: {str(e)}"

	def process_social_media(url: str) -> Dict[str, Any]:
	"""Process social media content with improved handling"""
	try:
	text = read_url(url)
	return {"text": text, "video": None}
	except Exception as e:
	logger.error(f"Social media processing error: {str(e)}")
	return {"text": None, "video": None}

	def main():
	"""Main function to create and run the Gradio app"""
	news_generator = NewsGenerator()

	with gr.Blocks() as demo:
	gr.Markdown("# Generador de Noticias Optimizado")

	with gr.Row():
	instrucciones = gr.Textbox(label="Instrucciones", lines=2)
	hechos = gr.Textbox(label="Hechos", lines=4)
	tamaño = gr.Number(label="Tamaño (palabras)", value=100)
	tono = gr.Dropdown(label="Tono", choices=["serio", "neutral", "divertido"], value="neutral")

	with gr.Row():
	documento = gr.File(label="Documento", file_types=["pdf", "docx", "xlsx", "csv"])
	audio = gr.File(label="Audio/Video", file_types=["audio", "video"])
	url = gr.Textbox(label="URL")
	social_url = gr.Textbox(label="URL de red social")

	with gr.Row():
	generar = gr.Button("Generar Noticia")
	noticia = gr.Textbox(label="Noticia Generada", lines=20)
	transcripciones = gr.Textbox(label="Transcripciones", lines=10)

	def generate_news_output(
	instrucciones: str,
	hechos: str,
	tamaño: int,
	tono: str,
	documento: Optional[gr.File],
	audio: Optional[gr.File],
	url: Optional[str],
	social_url: Optional[str]
	):
	try:
	# Process document
	if documento:
	doc_content = read_document(documento.name)
	else:
	doc_content = ""

	# Process audio
	if audio:
	audio_content = news_generator.transcribe_audio(audio.name)
	else:
	audio_content = ""

	# Process URL
	if url:
	url_content = read_url(url)
	else:
	url_content = ""

	# Process social media
	if social_url:
	social_content = process_social_media(social_url)
	else:
	social_content = {"text": "", "video": ""}

	# Generate prompt
	prompt = f"""[INST] Escribe una noticia basada en la siguiente información:
	Instrucciones: {instrucciones}
	Hechos: {hechos}
	Documento: {doc_content}
	Audio: {audio_content}
	URL: {url_content}
	Red Social: {social_content['text']}
	Video: {social_content['video'] if social_content else ''}

	Parámetros:
	- Tamaño: {tamaño} palabras
	- Tono: {tono}
	- Incluye: Título, gancho, cuerpo, 5W
	- Estilo periodístico
	[/INST]"""

	# Generate news
	news = news_generator.generate_news(prompt)
	return news, f"Transcripciones generadas correctamente"

	except Exception as e:
	return f"Error generando noticia: {str(e)}", f"Error: {str(e)}"

	generate_news_output(
	instrucciones,
	hechos,
	tamaño,
	tono,
	documento,
	audio,
	url,
	social_url
	)(generar, [noticia, transcripciones])

	if __name__ == "__main__":
	demo.launch()

	if __name__ == "__main__":
	main()