gabrielchua commited on
Commit
9db5d78
1 Parent(s): e9a914a

add configurability for tone and length

Browse files
Files changed (2) hide show
  1. app.py +38 -6
  2. utils.py +6 -6
app.py CHANGED
@@ -8,7 +8,7 @@ import os
8
  import time
9
  from pathlib import Path
10
  from tempfile import NamedTemporaryFile
11
- from typing import List, Literal, Tuple
12
 
13
  # Third-party imports
14
  import gradio as gr
@@ -36,15 +36,37 @@ class Dialogue(BaseModel):
36
  dialogue: List[DialogueItem]
37
 
38
 
39
- def generate_podcast(file: str) -> Tuple[str, str]:
40
  """Generate the audio and transcript from the PDF."""
 
 
 
 
41
  # Read the PDF file and extract text
42
- with Path(file).open("rb") as f:
43
- reader = PdfReader(f)
44
- text = "\n\n".join([page.extract_text() for page in reader.pages])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  # Call the LLM
47
- llm_output = generate_script(SYSTEM_PROMPT, text, Dialogue)
48
  logger.info(f"Generated dialogue: {llm_output}")
49
 
50
  # Process the dialogue
@@ -100,6 +122,16 @@ demo = gr.Interface(
100
  label="PDF",
101
  file_types=[".pdf", "file/*"],
102
  ),
 
 
 
 
 
 
 
 
 
 
103
  ],
104
  outputs=[
105
  gr.Audio(label="Audio", format="mp3"),
 
8
  import time
9
  from pathlib import Path
10
  from tempfile import NamedTemporaryFile
11
+ from typing import List, Literal, Tuple, Optional
12
 
13
  # Third-party imports
14
  import gradio as gr
 
36
  dialogue: List[DialogueItem]
37
 
38
 
39
+ def generate_podcast(file: str, tone: Optional[str] = None, length: Optional[str] = None) -> Tuple[str, str]:
40
  """Generate the audio and transcript from the PDF."""
41
+ # Check if the file is a PDF
42
+ if not file.lower().endswith('.pdf'):
43
+ raise gr.Error("Please upload a PDF file.")
44
+
45
  # Read the PDF file and extract text
46
+ try:
47
+ with Path(file).open("rb") as f:
48
+ reader = PdfReader(f)
49
+ text = "\n\n".join([page.extract_text() for page in reader.pages])
50
+ except Exception as e:
51
+ raise gr.Error(f"Error reading the PDF file: {str(e)}")
52
+
53
+ # Check if the PDF has more than ~150,000 characters
54
+ if len(text) > 100000:
55
+ raise gr.Error("The PDF is too long. Please upload a PDF with fewer than ~100,000 characters.")
56
+
57
+ # Modify the system prompt based on the chosen tone and length
58
+ modified_system_prompt = SYSTEM_PROMPT
59
+ if tone:
60
+ modified_system_prompt += f"\n\nTONE: The tone of the podcast should be {tone}."
61
+ if length:
62
+ length_instructions = {
63
+ "Short (1-2 min)": "Keep the podcast brief, around 1-2 minutes long.",
64
+ "Medium (3-5 min)": "Aim for a moderate length, about 3-5 minutes.",
65
+ }
66
+ modified_system_prompt += f"\n\nLENGTH: {length_instructions[length]}"
67
 
68
  # Call the LLM
69
+ llm_output = generate_script(modified_system_prompt, text, Dialogue)
70
  logger.info(f"Generated dialogue: {llm_output}")
71
 
72
  # Process the dialogue
 
122
  label="PDF",
123
  file_types=[".pdf", "file/*"],
124
  ),
125
+ gr.Radio(
126
+ choices=["Fun", "Formal"],
127
+ label="Tone of the podcast",
128
+ value="casual"
129
+ ),
130
+ gr.Radio(
131
+ choices=["Short (1-2 min)", "Medium (3-5 min)"],
132
+ label="Length of the podcast",
133
+ value="Medium (3-5 min)"
134
+ ),
135
  ],
136
  outputs=[
137
  gr.Audio(label="Audio", format="mp3"),
utils.py CHANGED
@@ -23,19 +23,19 @@ client = OpenAI(
23
  hf_client = Client("mrfakename/MeloTTS")
24
 
25
 
26
- def generate_script(system_prompt: str, text: str, dialogue_format):
27
  """Get the dialogue from the LLM."""
28
  # Load as python object
29
  try:
30
- response = call_llm(system_prompt, text, dialogue_format)
31
- dialogue = dialogue_format.model_validate_json(
32
  response.choices[0].message.content
33
  )
34
  except ValidationError as e:
35
  error_message = f"Failed to parse dialogue JSON: {e}"
36
- system_prompt_with_error = f"{system_prompt}\n\n Please return a VALID JSON object. This was the earlier error: {error_message}"
37
- response = call_llm(system_prompt_with_error, text, dialogue_format)
38
- dialogue = dialogue_format.model_validate_json(
39
  response.choices[0].message.content
40
  )
41
  return dialogue
 
23
  hf_client = Client("mrfakename/MeloTTS")
24
 
25
 
26
+ def generate_script(system_prompt: str, input_text: str, output_model):
27
  """Get the dialogue from the LLM."""
28
  # Load as python object
29
  try:
30
+ response = call_llm(system_prompt, input_text, output_model)
31
+ dialogue = output_model.model_validate_json(
32
  response.choices[0].message.content
33
  )
34
  except ValidationError as e:
35
  error_message = f"Failed to parse dialogue JSON: {e}"
36
+ system_prompt_with_error = f"{system_prompt}\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
37
+ response = call_llm(system_prompt_with_error, input_text, output_model)
38
+ dialogue = output_model.model_validate_json(
39
  response.choices[0].message.content
40
  )
41
  return dialogue