import gradio as gr import PyPDF2 import docx import requests from bs4 import BeautifulSoup from groq import Groq from gtts import gTTS from pydub import AudioSegment import os import io # Initialize Groq client groq_client = Groq(api_key=os.environ["GROQ_API_KEY"]) def extract_text(file_or_url): if isinstance(file_or_url, str): # URL response = requests.get(file_or_url) soup = BeautifulSoup(response.text, 'html.parser') return soup.get_text() elif file_or_url is not None: if file_or_url.name.endswith('.pdf'): reader = PyPDF2.PdfReader(file_or_url.file) return ' '.join([page.extract_text() for page in reader.pages]) elif file_or_url.name.endswith('.docx'): doc = docx.Document(file_or_url.file) return ' '.join([para.text for para in doc.paragraphs]) return "" def generate_podcast_script(text): prompt = f"""Generate a podcast script between a man and a woman discussing the following text: {text} The podcast should be informative and engaging, with a natural conversation flow. Limit the script to approximately 750 words to fit within a 5-minute podcast.""" response = groq_client.chat.completions.create( messages=[ {"role": "system", "content": "You are an AI assistant that generates podcast scripts based on given text."}, {"role": "user", "content": prompt} ], model="llama-3.1-70b-versatile", # Using LLaMa 3.1 70B model max_tokens=1000, temperature=0.7 ) return response.choices[0].message.content def text_to_speech(script): lines = script.split('\n') audio_segments = [] for line in lines: if line.startswith("Man:"): tts = gTTS(line[4:], lang='en', tld='co.uk') elif line.startswith("Woman:"): tts = gTTS(line[6:], lang='en', tld='com.au') else: continue buffer = io.BytesIO() tts.write_to_fp(buffer) buffer.seek(0) audio_segments.append(AudioSegment.from_mp3(buffer)) final_audio = sum(audio_segments) final_audio = final_audio[:300000] # Trim to 5 minutes (300,000 ms) buffer = io.BytesIO() final_audio.export(buffer, format="mp3") buffer.seek(0) return buffer def generate_podcast(file_or_url): text = extract_text(file_or_url) if not text: return None, "Failed to extract text. Please check your input." script = generate_podcast_script(text) audio_file = text_to_speech(script) return audio_file, script iface = gr.Interface( fn=generate_podcast, inputs=[ gr.File(label="Upload PDF/DOC file"), gr.Textbox(label="Or enter URL") ], outputs=[ gr.Audio(label="Generated Podcast"), gr.Textbox(label="Podcast Script") ], title="Custom NotebookLM-type Podcast Generator (LLaMa 3.1 70B)" ) iface.launch()