emirhanbilgic commited on
Commit
310b1cd
·
verified ·
1 Parent(s): d706b06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -15
app.py CHANGED
@@ -7,8 +7,7 @@ from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
7
  from PyPDF2 import PdfReader
8
  import re
9
  import textwrap
10
- import soundfile as SF
11
- import numpy as np
12
 
13
  # Device configuration
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -79,7 +78,6 @@ def generate_single_wav_from_text(sentence, description):
79
  audio_arr = generation.cpu().numpy().squeeze()
80
  return SAMPLE_RATE, audio_arr
81
 
82
-
83
  # Gradio Interface
84
  with gr.Blocks() as demo:
85
  with gr.Row():
@@ -92,33 +90,58 @@ with gr.Blocks() as demo:
92
  value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
93
  run_button = gr.Button("Generate Audio", variant="primary")
94
  with gr.Column():
95
- audio_container = gr.Column()
96
  markdown_output = gr.Markdown()
97
 
 
 
 
 
 
 
98
  def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
 
 
99
  text = pdf_to_text(pdf_input.name)
 
 
 
100
  if translate_checkbox:
 
101
  text = translate(text, source_lang, target_lang)
 
102
 
103
  sentences = split_text_into_sentences(text)
104
- all_audio_data = []
105
  all_text = ""
106
-
107
  for sentence in sentences:
 
108
  sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
109
- audio_data = (sample_rate, audio_arr)
110
- all_audio_data.append(audio_data)
111
  all_text += f"**Sentence**: {sentence}\n\n"
112
- yield all_audio_data, all_text
 
 
 
 
113
 
 
114
  def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
115
- audio_container.clear_components() # Clear previous components
116
- for audio_data_list, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
117
- for sample_rate, audio_arr in audio_data_list:
118
- audio_container.append(gr.Audio(value=(np.array(audio_arr).astype(np.float32), sample_rate)))
119
- yield None, markdown_text
 
 
 
 
120
 
121
- run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_container, markdown_output])
 
 
122
 
123
  demo.queue()
124
  demo.launch(share=True)
 
7
  from PyPDF2 import PdfReader
8
  import re
9
  import textwrap
10
+ import soundfile as sf
 
11
 
12
  # Device configuration
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
78
  audio_arr = generation.cpu().numpy().squeeze()
79
  return SAMPLE_RATE, audio_arr
80
 
 
81
  # Gradio Interface
82
  with gr.Blocks() as demo:
83
  with gr.Row():
 
90
  value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
91
  run_button = gr.Button("Generate Audio", variant="primary")
92
  with gr.Column():
93
+ audio_output = gr.Audio(label="Generated Audio")
94
  markdown_output = gr.Markdown()
95
 
96
+ # Helper function to combine audio arrays
97
+ def combine_audio_arrays(audio_list):
98
+ combined_audio = np.concatenate(audio_list, axis=0)
99
+ return combined_audio
100
+
101
+ # Adjust the handle_process function to accumulate and combine audio
102
  def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
103
+ # Extract and process text from PDF
104
+ print("Extracting text from PDF...")
105
  text = pdf_to_text(pdf_input.name)
106
+ print(f"Extracted text: {text[:100]}...") # Display the first 100 characters for a quick preview
107
+
108
+ # Perform translation if enabled
109
  if translate_checkbox:
110
+ print("Translating text...")
111
  text = translate(text, source_lang, target_lang)
112
+ print(f"Translated text: {text[:100]}...") # Display the first 100 characters for a quick preview
113
 
114
  sentences = split_text_into_sentences(text)
115
+ all_audio = []
116
  all_text = ""
117
+
118
  for sentence in sentences:
119
+ print(f"Processing sentence: {sentence[:50]}...") # Display the first 50 characters for a quick preview
120
  sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
121
+ all_audio.append(audio_arr)
122
+ combined_audio = combine_audio_arrays(all_audio)
123
  all_text += f"**Sentence**: {sentence}\n\n"
124
+
125
+ # Yield the accumulated results
126
+ yield sample_rate, combined_audio, all_text
127
+
128
+ print("Processing complete.")
129
 
130
+ # Update the Gradio interface pipeline function to handle combined audio
131
  def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
132
+ # Stream outputs to Gradio interface
133
+ for sample_rate, combined_audio, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
134
+ yield (sample_rate, combined_audio), markdown_text
135
+
136
+ def handle_translation_toggle(translate_checkbox):
137
+ if translate_checkbox:
138
+ return gr.update(visible=True), gr.update(visible=True)
139
+ else:
140
+ return gr.update(visible=False), gr.update(visible=False)
141
 
142
+ translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
143
+ source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
144
+ run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_output, markdown_output])
145
 
146
  demo.queue()
147
  demo.launch(share=True)