News_AI / app.py
CamiloVega's picture
Update app.py
9e962aa verified
import gradio as gr
import logging
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import whisper
from huggingface_hub import login
from pydub import AudioSegment
import requests
from bs4 import BeautifulSoup
from typing import Optional, Dict, Any
import fitz # PyMuPDF
import os
# Logging configuration
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Get Hugging Face token
HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
if not HUGGINGFACE_TOKEN:
logger.warning("HUGGINGFACE_TOKEN not found in environment variables")
raise ValueError("Please configure HUGGINGFACE_TOKEN in environment variables")
# Hugging Face authentication
login(token=HUGGINGFACE_TOKEN)
class NewsGenerator:
def __init__(self):
# Optimize for CPU
self.device = "cpu"
torch.set_grad_enabled(False)
# Clear CUDA memory if exists
if torch.cuda.is_available():
torch.cuda.empty_cache()
self.whisper_model = None
self.llm_model = None
self.tokenizer = None
logger.info(f"Using device: {self.device.upper()}")
self._load_models()
def _load_models(self):
"""Optimized model loading for CPU with memory management"""
try:
# DeepSeek model configuration
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
# Load tokenizer with optimizations
self.tokenizer = AutoTokenizer.from_pretrained(
model_name,
use_fast=True,
token=HUGGINGFACE_TOKEN
)
# Load model with memory optimizations
self.llm_model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cpu",
torch_dtype=torch.float32,
low_cpu_mem_usage=True,
token=HUGGINGFACE_TOKEN
).eval()
# Load Whisper with minimal footprint
self.whisper_model = whisper.load_model(
"tiny.en",
device=self.device
)
logger.info("Models loaded successfully")
except Exception as e:
logger.error(f"Error loading models: {str(e)}")
raise
def transcribe_audio(self, audio_path: str) -> str:
"""Audio transcription with error handling"""
try:
result = self.whisper_model.transcribe(audio_path)
return result.get("text", "")
except Exception as e:
logger.error(f"Transcription error: {str(e)}")
return ""
def generate_news(self, prompt: str, max_length: int = 512) -> str:
"""News generation with DeepSeek"""
try:
formatted_prompt = (
f"<|System|>\nYou are a professional journalist. Generate a news article "
f"based on this data:\n{prompt}\n<|End|>\n"
f"<|User|>\nWrite the article:<|End|>\n<|Assistant|>"
)
inputs = self.tokenizer(
formatted_prompt,
return_tensors="pt",
truncation=True,
max_length=1024
).to(self.device)
with torch.inference_mode():
outputs = self.llm_model.generate(
**inputs,
max_new_tokens=max_length,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
except Exception as e:
logger.error(f"Generation error: {str(e)}")
return "Error generating the article"
# Initialize generator at module level
generator = NewsGenerator()
# Create the demo interface
demo = gr.Blocks(
title="AI News Generator",
theme=gr.themes.Soft(),
css="footer {display: none !important}"
)
with demo:
gr.Markdown("""
# πŸ“° AI News Generator
Transform raw data into professional news articles using AI.
""")
with gr.Row():
with gr.Column(scale=3):
main_input = gr.Textbox(
label="Main Topic",
placeholder="Enter the main topic...",
lines=3
)
additional_data = gr.Textbox(
label="Additional Data",
placeholder="Key facts...",
lines=3
)
with gr.Accordion("Additional Sources", open=False):
doc_upload = gr.File(
label="Upload Document",
file_types=[".pdf", ".docx", ".xlsx", ".csv"]
)
audio_upload = gr.File(
label="Upload Audio/Video",
file_types=["audio", "video"]
)
length_slider = gr.Slider(
100, 1000,
value=400,
label="Article Length (words)"
)
tone_select = gr.Dropdown(
label="Journalistic Tone",
choices=["Formal", "Neutral", "Investigative", "Narrative"],
value="Neutral"
)
with gr.Column(scale=2):
output_news = gr.Textbox(
label="Generated Article",
lines=18,
interactive=False
)
generate_btn = gr.Button("Generate Article", variant="primary")
status = gr.Textbox(label="Status", interactive=False)
def process_and_generate(
main_input: str,
additional_data: str,
document: Optional[str],
audio: Optional[str],
length: int,
tone: str
):
try:
# Process documents if provided
doc_content = ""
if document:
doc_content = read_document(document)
# Process audio if provided
audio_content = ""
if audio:
audio_content = generator.transcribe_audio(audio)
# Build prompt
prompt = f"""
Topic: {main_input}
Data: {additional_data}
Tone: {tone}
Document Content: {doc_content[:500]}
Audio Content: {audio_content[:300]}
Length: {length} words
"""
return generator.generate_news(prompt, length), "βœ… Generation successful"
except Exception as e:
logger.error(str(e))
return f"Error: {str(e)}", "❌ Generation error"
generate_btn.click(
fn=process_and_generate,
inputs=[
main_input,
additional_data,
doc_upload,
audio_upload,
length_slider,
tone_select
],
outputs=[output_news, status]
)
if __name__ == "__main__":
demo.queue()
demo.launch(
server_name="0.0.0.0",
server_port=7860
)