#Codes from killerz3/PodGen & eswardivi/Podcastify
import json
import spaces
import httpx
import asyncio
import edge_tts
import torch
import gradio as gr
import gradio_client
from pydub import AudioSegment
from transformers import AutoModelForCausalLM, AutoTokenizer
from moviepy.editor import AudioFileClip, concatenate_audioclips
system_prompt = '''
You are an educational podcast generator. You have to create a podcast between Alice and Bob that gives an overview of the News given by the user.
Please provide the script in the following JSON format directly and only include it:
"title": "[string]",
"content": {
"Alice_0": "[string]",
"BOB_0": "[string]",
Please note that the text you generate now must be based on the tone of people's daily life.
And the punctuation marks only include commas and periods.
A podcast talking about the link's content you provided.
🔎 Paste a website link with http/https.
🦕 Generate podcast.
css = """
h1 {
text-align: center;
display: block;
footer {
display:none !important
model = AutoModelForCausalLM.from_pretrained(
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B-Chat")
def validate_url(url):
response = httpx.get(url, timeout=60.0)
return response.text
except httpx.RequestError as e:
return f"An error occurred while requesting {url}: {str(e)}"
except httpx.HTTPStatusError as e:
return f"Error response {e.response.status_code} while requesting {url}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
def fetch_text(url):
print("Entered Webpage Extraction")
prefix_url = "https://r.jina.ai/"
full_url = prefix_url + url
print("Exited Webpage Extraction")
return validate_url(full_url)
async def text_to_speech(text, voice, filename):
communicate = edge_tts.Communicate(text, voice)
await communicate.save(filename)
async def gen_show(script):
title = script['title']
content = script['content']
temp_files = []
tasks = []
for key, text in content.items():
speaker = key.split('_')[0] # Extract the speaker name
index = key.split('_')[1] # Extract the dialogue index
voice = "en-US-JennyNeural" if speaker == "Alice" else "en-US-GuyNeural"
# Create temporary file for each speaker's dialogue
temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
filename = temp_file.name
tasks.append(text_to_speech(text, voice, filename))
print(f"Generated audio for {speaker}_{index}: {filename}")
await asyncio.gather(*tasks)
# Combine the audio files using moviepy
audio_clips = [AudioFileClip(temp_file) for temp_file in temp_files]
combined = concatenate_audioclips(audio_clips)
# Create temporary file for the combined output
output_filename = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False).name
# Save the combined file
print(f"Combined audio saved as: {output_filename}")
# Clean up temporary files
for temp_file in temp_files:
print(f"Deleted temporary file: {temp_file}")
return output_filename
def generator(messages):
answer = tokenizer.apply_chat_template(
model_inputs = tokenizer([answer], return_tensors="pt").to(0)
generated_ids = model.generate(
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
results = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return results
async def main(link):
if not link.startswith("http://") and not link.startswith("https://"):
return "URL must start with 'http://' or 'https://'",None
text = fetch_text(link)
if "Error" in text:
return text, None
prompt = f"News: {text}, json:"
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
generated_script = generator(messages)
print("Generated Script:"+generated_script)
# Check if the generated_script is empty or not valid JSON
if not generated_script or not generated_script.strip().startswith('{'):
raise ValueError("Failed to generate a valid script.")
script_json = json.loads(generated_script) # Use the generated script as input
output_filename = await gen_show(script_json)
print("Output File:"+output_filename)
# Read the generated audio file
return output_filename
with gr.Blocks(theme='soft', css=css, title="Musen") as iface:
with gr.Accordion(""):
with gr.Row():
output_box = gr.Audio(label="Podcast", type="filepath", interactive=False, autoplay=True, elem_classes="audio") # Create an output textbox
with gr.Row():
input_box = gr.Textbox(label="Link", placeholder="Enter a http link")
with gr.Row():
submit_btn = gr.Button("🚀 Send") # Create a submit button
clear_btn = gr.ClearButton(output_box, value="🗑️ Clear") # Create a clear button
# Set up the event listeners
submit_btn.click(main, inputs=input_box, outputs=output_box)
iface.queue().launch(show_api=False) # Launch the Gradio interface