File size: 6,050 Bytes
d8ad8f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#Codes from killerz3/PodGen & eswardivi/Podcastify
import subprocess
subprocess.run(
    'pip install flash-attn --no-build-isolation',
    env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
    shell=True
)

import json
import spaces
import httpx
import asyncio
import edge_tts
import torch
import gradio as gr
import gradio_client
from pydub import AudioSegment
from transformers import AutoModelForCausalLM, AutoTokenizer

from moviepy.editor import AudioFileClip, concatenate_audioclips

system_prompt = '''
    You are an educational podcast generator. You have to create a podcast between Alice and Bob that gives an overview of the News given by the user.
    Please provide the script in the following JSON format directly and only include it:
    {
      "title": "[string]",
      "content": {
        "Alice_0": "[string]",
        "BOB_0": "[string]",
        ...
      }
    }
    Please note that the text you generate now must be based on the tone of people's daily life.
    And the punctuation marks only include commas and periods.
'''

DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Musen</h1>
<p>A podcast talking about the link's content you provided.</p>
<p>πŸ”Ž Paste a website link with http/https.</p>
<p>πŸ¦• Generate podcast. </p>
</div>
'''

css = """
h1 {
    text-align: center;
    display: block;
}
footer {
    display:none !important
}
"""

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4",
    torch_dtype="auto",
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4")


async def validate_url(url):
    try:
        response = httpx.get(url, timeout=60.0)
        response.raise_for_status()
        return response.text
    except httpx.RequestError as e:
        return f"An error occurred while requesting {url}: {str(e)}"
    except httpx.HTTPStatusError as e:
        return f"Error response {e.response.status_code} while requesting {url}"
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}"

async def fetch_text(url):
    print("Entered Webpage Extraction")
    prefix_url = "https://r.jina.ai/"
    full_url = prefix_url + url
    print(full_url)
    print("Exited Webpage Extraction")
    return validate_url(full_url)

async def text_to_speech(text, voice, filename):
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(filename)


async def gen_show(script):
    title = script['title']
    content = script['content']

    temp_files = []

    tasks = []
    for key, text in content.items():
        speaker = key.split('_')[0]  # Extract the speaker name
        index = key.split('_')[1]    # Extract the dialogue index
        voice = "en-US-JennyNeural" if speaker == "Alice" else "en-US-GuyNeural"

        # Create temporary file for each speaker's dialogue
        temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
        temp_files.append(temp_file.name)

        filename = temp_file.name
        tasks.append(text_to_speech(text, voice, filename))
        print(f"Generated audio for {speaker}_{index}: {filename}")

    await asyncio.gather(*tasks)

    # Combine the audio files using moviepy
    audio_clips = [AudioFileClip(temp_file) for temp_file in temp_files]
    combined = concatenate_audioclips(audio_clips)

    # Create temporary file for the combined output
    output_filename = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False).name

    # Save the combined file
    combined.write_audiofile(output_filename)
    print(f"Combined audio saved as: {output_filename}")

    # Clean up temporary files
    for temp_file in temp_files:
        os.remove(temp_file)
        print(f"Deleted temporary file: {temp_file}")

    return output_filename

@spaces.GPU(duration=100)
async def generator(link):
    if not link.startswith("http://") and not article_url.startswith("https://"):
        return "URL must start with 'http://' or 'https://'",None

    text = fetch_text(link)

    if "Error" in text:
        return text, None

    prompt = f"News: {text}, json:"

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]

    answer = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([answer], return_tensors="pt").to(0)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=512
    )

    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    results = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    generated_script = results

    print("Generated Script:"+generated_script)

    # Check if the generated_script is empty or not valid JSON
    if not generated_script or not generated_script.strip().startswith('{'):
        raise ValueError("Failed to generate a valid script.")


    script_json = json.loads(generated_script)  # Use the generated script as input
    output_filename = await gen_show(script_json)
    print("Output File:"+output_filename)

    # Read the generated audio file
    return output_filename

with gr.Blocks(theme='soft', css=css, title="Musen") as iface:
    with gr.Accordion(""):
        gr.Markdown(DESCRIPTION)
    with gr.Row():
        output_box = gr.Audio(label="Podcast", type="filepath", interactive=False, autoplay=True, elem_classes="audio")  # Create an output textbox
    with gr.Row():
        input_box = gr.Textbox(label="Link", placeholder="Enter a http link")
    with gr.Row():
        submit_btn = gr.Button("πŸš€ Send")  # Create a submit button
        clear_btn = gr.ClearButton(output_box, value="πŸ—‘οΈ Clear") # Create a clear button

    # Set up the event listeners
    submit_btn.click(generator, inputs=input_box, outputs=output_box)


#gr.close_all()

iface.queue().launch(show_api=False)  # Launch the Gradio interface