File size: 3,777 Bytes
416840c
be89ec7
f0db570
773cc27
7263dd8
ecb2850
7263dd8
416840c
be89ec7
db3e1e6
 
 
be89ec7
db3e1e6
 
 
 
be89ec7
 
 
 
db3e1e6
be89ec7
db3e1e6
 
 
 
 
 
 
3e62a46
db3e1e6
773cc27
db3e1e6
773cc27
 
 
 
 
 
 
 
 
db3e1e6
773cc27
db3e1e6
773cc27
 
 
 
 
7263dd8
db3e1e6
 
 
 
 
 
773cc27
db3e1e6
 
 
 
773cc27
db3e1e6
 
 
 
773cc27
 
 
 
db3e1e6
 
 
 
773cc27
db3e1e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0db570
db3e1e6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
from utils import generate_script, generate_audio, truncate_text, extract_text_from_url
from prompts import SYSTEM_PROMPT
from pydub import AudioSegment
import pypdf
import os
import tempfile

def generate_podcast(file, url, tone, length):
    try:
        if file and url:
            return None, "Please provide either a PDF file or a URL, not both."
        
        if file:
            if not file.name.lower().endswith('.pdf'):
                return None, "Please upload a PDF file."
            
            pdf_reader = pypdf.PdfReader(file.name)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()
        elif url:
            text = extract_text_from_url(url)
        else:
            return None, "Please provide either a PDF file or a URL."
        
        truncated_text = truncate_text(text)
        if len(truncated_text) < len(text):
            print("Warning: The input text was truncated to fit within 2048 tokens.")
        
        script = generate_script(SYSTEM_PROMPT, truncated_text, tone, length)
        
        audio_segments = []
        transcript = ""
        try:
            for item in script.dialogue:
                audio_file = generate_audio(item.text, item.speaker)
                audio_segment = AudioSegment.from_mp3(audio_file)
                audio_segments.append(audio_segment)
                transcript += f"**{item.speaker}**: {item.text}\n\n"
                os.remove(audio_file)  # Clean up temporary audio file
        except Exception as e:
            raise gr.Error(f"Error generating audio: {str(e)}")
        
        combined_audio = sum(audio_segments)
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
            combined_audio.export(temp_audio.name, format="mp3")
            temp_audio_path = temp_audio.name
        
        return temp_audio_path, transcript
    
    except Exception as e:
        return None, f"An error occurred: {str(e)}"

instructions = """
# Podcast Generator

Welcome to the Podcast Generator project! This tool creates custom podcast episodes using AI-generated content.

## Features
* Generate podcast scripts from PDF content or web pages
* Convert text to speech for a natural listening experience
* Choose the tone of your podcast (Humorous, Casual, or Formal)
* Export episodes as MP3 files

## How to Use
1. Upload a PDF file OR enter a URL (content will be truncated to 2048 tokens if longer)
2. Select the desired tone:
   - Humorous: Expect jokes, puns, and playful banter
   - Casual: Colloquial language, like a conversation between college students
   - Formal: Professional podcast style with well-structured arguments
3. Choose the podcast length
4. Click "Generate" to create your podcast
5. Listen to the generated audio and review the transcript

Note: This tool uses the LLaMa 3.1 70B model for script generation and gTTS for text-to-speech conversion. The podcast features Sarah (American accent) and Maria (British accent) as hosts.
"""

iface = gr.Interface(
    fn=generate_podcast,
    inputs=[
        gr.File(label="Upload PDF file (optional)", file_types=[".pdf"]),
        gr.Textbox(label="OR Enter URL"),
        gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
        gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
    ],
    outputs=[
        gr.Audio(label="Generated Podcast"),
        gr.Markdown(label="Transcript")
    ],
    title="Custom NotebookLM-type Podcast Generator (2048 token limit)",
    description=instructions,
    allow_flagging="never",
    theme=gr.themes.Soft()
)

if __name__ == "__main__":
    iface.launch()