Spaces:

CamiloVega
/

News_AI

Runtime error

App Files Files Community

News_AI / app.py

CamiloVega

Update app.py

9e962aa verified 4 days ago

raw

history blame contribute delete

7.29 kB

	import gradio as gr
	import logging
	import torch
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import whisper
	from huggingface_hub import login
	from pydub import AudioSegment
	import requests
	from bs4 import BeautifulSoup
	from typing import Optional, Dict, Any
	import fitz # PyMuPDF
	import os

	# Logging configuration
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Get Hugging Face token
	HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
	if not HUGGINGFACE_TOKEN:
	logger.warning("HUGGINGFACE_TOKEN not found in environment variables")
	raise ValueError("Please configure HUGGINGFACE_TOKEN in environment variables")

	# Hugging Face authentication
	login(token=HUGGINGFACE_TOKEN)

	class NewsGenerator:
	def __init__(self):
	# Optimize for CPU
	self.device = "cpu"
	torch.set_grad_enabled(False)

	# Clear CUDA memory if exists
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	self.whisper_model = None
	self.llm_model = None
	self.tokenizer = None

	logger.info(f"Using device: {self.device.upper()}")
	self._load_models()

	def _load_models(self):
	"""Optimized model loading for CPU with memory management"""
	try:
	# DeepSeek model configuration
	model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

	# Load tokenizer with optimizations
	self.tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	use_fast=True,
	token=HUGGINGFACE_TOKEN
	)

	# Load model with memory optimizations
	self.llm_model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="cpu",
	torch_dtype=torch.float32,
	low_cpu_mem_usage=True,
	token=HUGGINGFACE_TOKEN
	).eval()

	# Load Whisper with minimal footprint
	self.whisper_model = whisper.load_model(
	"tiny.en",
	device=self.device
	)

	logger.info("Models loaded successfully")

	except Exception as e:
	logger.error(f"Error loading models: {str(e)}")
	raise

	def transcribe_audio(self, audio_path: str) -> str:
	"""Audio transcription with error handling"""
	try:
	result = self.whisper_model.transcribe(audio_path)
	return result.get("text", "")
	except Exception as e:
	logger.error(f"Transcription error: {str(e)}")
	return ""

	def generate_news(self, prompt: str, max_length: int = 512) -> str:
	"""News generation with DeepSeek"""
	try:
	formatted_prompt = (
	f"<\|System\|>\nYou are a professional journalist. Generate a news article "
	f"based on this data:\n{prompt}\n<\|End\|>\n"
	f"<\|User\|>\nWrite the article:<\|End\|>\n<\|Assistant\|>"
	)

	inputs = self.tokenizer(
	formatted_prompt,
	return_tensors="pt",
	truncation=True,
	max_length=1024
	).to(self.device)

	with torch.inference_mode():
	outputs = self.llm_model.generate(
	**inputs,
	max_new_tokens=max_length,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=self.tokenizer.eos_token_id
	)

	return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	except Exception as e:
	logger.error(f"Generation error: {str(e)}")
	return "Error generating the article"

	# Initialize generator at module level
	generator = NewsGenerator()

	# Create the demo interface
	demo = gr.Blocks(
	title="AI News Generator",
	theme=gr.themes.Soft(),
	css="footer {display: none !important}"
	)

	with demo:
	gr.Markdown("""
	# 📰 AI News Generator
	Transform raw data into professional news articles using AI.
	""")

	with gr.Row():
	with gr.Column(scale=3):
	main_input = gr.Textbox(
	label="Main Topic",
	placeholder="Enter the main topic...",
	lines=3
	)
	additional_data = gr.Textbox(
	label="Additional Data",
	placeholder="Key facts...",
	lines=3
	)

	with gr.Accordion("Additional Sources", open=False):
	doc_upload = gr.File(
	label="Upload Document",
	file_types=[".pdf", ".docx", ".xlsx", ".csv"]
	)
	audio_upload = gr.File(
	label="Upload Audio/Video",
	file_types=["audio", "video"]
	)

	length_slider = gr.Slider(
	100, 1000,
	value=400,
	label="Article Length (words)"
	)
	tone_select = gr.Dropdown(
	label="Journalistic Tone",
	choices=["Formal", "Neutral", "Investigative", "Narrative"],
	value="Neutral"
	)

	with gr.Column(scale=2):
	output_news = gr.Textbox(
	label="Generated Article",
	lines=18,
	interactive=False
	)
	generate_btn = gr.Button("Generate Article", variant="primary")
	status = gr.Textbox(label="Status", interactive=False)

	def process_and_generate(
	main_input: str,
	additional_data: str,
	document: Optional[str],
	audio: Optional[str],
	length: int,
	tone: str
	):
	try:
	# Process documents if provided
	doc_content = ""
	if document:
	doc_content = read_document(document)

	# Process audio if provided
	audio_content = ""
	if audio:
	audio_content = generator.transcribe_audio(audio)

	# Build prompt
	prompt = f"""
	Topic: {main_input}
	Data: {additional_data}
	Tone: {tone}
	Document Content: {doc_content[:500]}
	Audio Content: {audio_content[:300]}
	Length: {length} words
	"""

	return generator.generate_news(prompt, length), "✅ Generation successful"

	except Exception as e:
	logger.error(str(e))
	return f"Error: {str(e)}", "❌ Generation error"

	generate_btn.click(
	fn=process_and_generate,
	inputs=[
	main_input,
	additional_data,
	doc_upload,
	audio_upload,
	length_slider,
	tone_select
	],
	outputs=[output_news, status]
	)

	if __name__ == "__main__":
	demo.queue()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860
	)