Spaces:

siddhartharya
/

My_NotebookLM_Podcast_Generator

Running

App Files Files Community

siddhartharya commited on 18 days ago

Commit

be89ec7

•

1 Parent(s): b5389b5

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -21

app.py CHANGED Viewed

@@ -1,36 +1,43 @@
 import gradio as gr
-from utils import generate_script, generate_audio, truncate_text
 from prompts import SYSTEM_PROMPT
 from pydub import AudioSegment
 import pypdf
 import os
 import tempfile
-def generate_podcast(file, tone, length):
-    # Extract text from PDF
-    if not file.name.lower().endswith('.pdf'):
-        raise gr.Error("Please upload a PDF file.")
-    try:
-        pdf_reader = pypdf.PdfReader(file.name)
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text()
-    except Exception as e:
-        raise gr.Error(f"Error reading the PDF file: {str(e)}")
-    # Truncate text to 2048 tokens
     truncated_text = truncate_text(text)
     if len(truncated_text) < len(text):
         print("Warning: The input text was truncated to fit within 2048 tokens.")
-    # Generate script
     try:
         script = generate_script(SYSTEM_PROMPT, truncated_text, tone, length)
     except Exception as e:
         raise gr.Error(f"Error generating script: {str(e)}")
-    # Generate audio for each dialogue item
     audio_segments = []
     transcript = ""
     try:
@@ -43,30 +50,27 @@ def generate_podcast(file, tone, length):
     except Exception as e:
         raise gr.Error(f"Error generating audio: {str(e)}")
-    # Combine audio segments
     combined_audio = sum(audio_segments)
-    # Save combined audio to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
         combined_audio.export(temp_audio.name, format="mp3")
         temp_audio_path = temp_audio.name
     return temp_audio_path, transcript
-# Gradio interface setup
 instructions = """
 # Podcast Generator
 Welcome to the Podcast Generator project! This tool allows you to create custom podcast episodes using AI-generated content.
 ## Features
-* Generate podcast scripts from PDF content
 * Convert text to speech for a natural listening experience
 * Choose the tone of your podcast
 * Export episodes as MP3 files
 ## How to Use
-1. Upload a PDF file (content will be truncated to 2048 tokens if longer)
 2. Select the desired tone (humorous, casual, formal)
 3. Choose the podcast length
 4. Click "Generate" to create your podcast
@@ -78,7 +82,8 @@ Note: This tool uses the LLaMa 3.1 70B model for script generation and gTTS for
 iface = gr.Interface(
     fn=generate_podcast,
     inputs=[
-        gr.File(label="Upload PDF file", file_types=[".pdf"]),
         gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
         gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
     ],

 import gradio as gr
+from utils import generate_script, generate_audio, truncate_text, extract_text_from_url
 from prompts import SYSTEM_PROMPT
 from pydub import AudioSegment
 import pypdf
 import os
 import tempfile
+def generate_podcast(file, url, tone, length):
+    if file and url:
+        raise gr.Error("Please provide either a PDF file or a URL, not both.")
+    if file:
+        if not file.name.lower().endswith('.pdf'):
+            raise gr.Error("Please upload a PDF file.")
+        try:
+            pdf_reader = pypdf.PdfReader(file.name)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+        except Exception as e:
+            raise gr.Error(f"Error reading the PDF file: {str(e)}")
+    elif url:
+        try:
+            text = extract_text_from_url(url)
+        except Exception as e:
+            raise gr.Error(f"Error extracting text from URL: {str(e)}")
+    else:
+        raise gr.Error("Please provide either a PDF file or a URL.")
     truncated_text = truncate_text(text)
     if len(truncated_text) < len(text):
         print("Warning: The input text was truncated to fit within 2048 tokens.")
     try:
         script = generate_script(SYSTEM_PROMPT, truncated_text, tone, length)
     except Exception as e:
         raise gr.Error(f"Error generating script: {str(e)}")
     audio_segments = []
     transcript = ""
     try:
     except Exception as e:
         raise gr.Error(f"Error generating audio: {str(e)}")
     combined_audio = sum(audio_segments)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
         combined_audio.export(temp_audio.name, format="mp3")
         temp_audio_path = temp_audio.name
     return temp_audio_path, transcript
 instructions = """
 # Podcast Generator
 Welcome to the Podcast Generator project! This tool allows you to create custom podcast episodes using AI-generated content.
 ## Features
+* Generate podcast scripts from PDF content or web pages
 * Convert text to speech for a natural listening experience
 * Choose the tone of your podcast
 * Export episodes as MP3 files
 ## How to Use
+1. Upload a PDF file OR enter a URL (content will be truncated to 2048 tokens if longer)
 2. Select the desired tone (humorous, casual, formal)
 3. Choose the podcast length
 4. Click "Generate" to create your podcast
 iface = gr.Interface(
     fn=generate_podcast,
     inputs=[
+        gr.File(label="Upload PDF file (optional)", file_types=[".pdf"]),
+        gr.Textbox(label="OR Enter URL"),
         gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
         gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
     ],