Musen

#Codes from killerz3/PodGen & eswardivi/Podcastify
import json
import spaces
import httpx
import asyncio
import edge_tts
import torch
import gradio as gr
import gradio_client
from pydub import AudioSegment
from transformers import AutoModelForCausalLM, AutoTokenizer

from moviepy.editor import AudioFileClip, concatenate_audioclips

system_prompt = '''
    You are an educational podcast generator. You have to create a podcast between Alice and Bob that gives an overview of the News given by the user.
    Please provide the script in the following JSON format directly and only include it:
    {
      "title": "[string]",
      "content": {
        "Alice_0": "[string]",
        "BOB_0": "[string]",
        ...
      }
    }
    Please note that the text you generate now must be based on the tone of people's daily life.
    And the punctuation marks only include commas and periods.
'''

DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Musen</h1>
<p>A podcast talking about the link's content you provided.</p>
<p>🔎 Paste a website link with http/https.</p>
<p>🦕 Generate podcast. </p>
</div>
'''

css = """
h1 {
    text-align: center;
    display: block;
}
footer {
    display:none !important
}
"""

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen1.5-1.8B-Chat",
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B-Chat")


def validate_url(url):
    try:
        response = httpx.get(url, timeout=60.0)
        response.raise_for_status()
        return response.text
    except httpx.RequestError as e:
        return f"An error occurred while requesting {url}: {str(e)}"
    except httpx.HTTPStatusError as e:
        return f"Error response {e.response.status_code} while requesting {url}"
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}"

def fetch_text(url):
    print("Entered Webpage Extraction")
    prefix_url = "https://r.jina.ai/"
    full_url = prefix_url + url
    print(full_url)
    print("Exited Webpage Extraction")
    return validate_url(full_url)

async def text_to_speech(text, voice, filename):
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(filename)


async def gen_show(script):
    title = script['title']
    content = script['content']

    temp_files = []

    tasks = []
    for key, text in content.items():
        speaker = key.split('_')[0]  # Extract the speaker name
        index = key.split('_')[1]    # Extract the dialogue index
        voice = "en-US-JennyNeural" if speaker == "Alice" else "en-US-GuyNeural"

        # Create temporary file for each speaker's dialogue
        temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
        temp_files.append(temp_file.name)

        filename = temp_file.name
        tasks.append(text_to_speech(text, voice, filename))
        print(f"Generated audio for {speaker}_{index}: {filename}")

    await asyncio.gather(*tasks)

    # Combine the audio files using moviepy
    audio_clips = [AudioFileClip(temp_file) for temp_file in temp_files]
    combined = concatenate_audioclips(audio_clips)

    # Create temporary file for the combined output
    output_filename = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False).name

    # Save the combined file
    combined.write_audiofile(output_filename)
    print(f"Combined audio saved as: {output_filename}")

    # Clean up temporary files
    for temp_file in temp_files:
        os.remove(temp_file)
        print(f"Deleted temporary file: {temp_file}")

    return output_filename

@spaces.GPU(duration=100)
def generator(messages):
    answer = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([answer], return_tensors="pt").to(0)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=512
    )

    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    results = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return results
    

async def main(link):
    if not link.startswith("http://") and not link.startswith("https://"):
        return "URL must start with 'http://' or 'https://'",None

    text = fetch_text(link)

    if "Error" in text:
        return text, None

    prompt = f"News: {text}, json:"

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]
    
    generated_script = generator(messages)

    print("Generated Script:"+generated_script)

    # Check if the generated_script is empty or not valid JSON
    if not generated_script or not generated_script.strip().startswith('{'):
        raise ValueError("Failed to generate a valid script.")


    script_json = json.loads(generated_script)  # Use the generated script as input
    output_filename = await gen_show(script_json)
    print("Output File:"+output_filename)

    # Read the generated audio file
    return output_filename

with gr.Blocks(theme='soft', css=css, title="Musen") as iface:
    with gr.Accordion(""):
        gr.Markdown(DESCRIPTION)
    with gr.Row():
        output_box = gr.Audio(label="Podcast", type="filepath", interactive=False, autoplay=True, elem_classes="audio")  # Create an output textbox
    with gr.Row():
        input_box = gr.Textbox(label="Link", placeholder="Enter a http link")
    with gr.Row():
        submit_btn = gr.Button("🚀 Send")  # Create a submit button
        clear_btn = gr.ClearButton(output_box, value="🗑️ Clear") # Create a clear button

    # Set up the event listeners
    submit_btn.click(main, inputs=input_box, outputs=output_box)


#gr.close_all()

iface.queue().launch(show_api=False)  # Launch the Gradio interface