Spaces:

emirhanbilgic
/

read-my-pdf-outloud

Running

App Files Files Community

emirhanbilgic commited on Aug 11, 2024

Commit

310b1cd

verified ·

1 Parent(s): d706b06

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -15

app.py CHANGED Viewed

@@ -7,8 +7,7 @@ from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 from PyPDF2 import PdfReader
 import re
 import textwrap
-import soundfile as SF
-import numpy as np
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -79,7 +78,6 @@ def generate_single_wav_from_text(sentence, description):
     audio_arr = generation.cpu().numpy().squeeze()
     return SAMPLE_RATE, audio_arr
 # Gradio Interface
 with gr.Blocks() as demo:
     with gr.Row():
@@ -92,33 +90,58 @@ with gr.Blocks() as demo:
                                      value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
             run_button = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
-            audio_container = gr.Column()
             markdown_output = gr.Markdown()
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
         text = pdf_to_text(pdf_input.name)
         if translate_checkbox:
             text = translate(text, source_lang, target_lang)
         sentences = split_text_into_sentences(text)
-        all_audio_data = []
         all_text = ""
         for sentence in sentences:
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
-            audio_data = (sample_rate, audio_arr)
-            all_audio_data.append(audio_data)
             all_text += f"**Sentence**: {sentence}\n\n"
-            yield all_audio_data, all_text
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
-        audio_container.clear_components()  # Clear previous components
-        for audio_data_list, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
-            for sample_rate, audio_arr in audio_data_list:
-                audio_container.append(gr.Audio(value=(np.array(audio_arr).astype(np.float32), sample_rate)))
-            yield None, markdown_text
-    run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_container, markdown_output])
 demo.queue()
 demo.launch(share=True)

 from PyPDF2 import PdfReader
 import re
 import textwrap
+import soundfile as sf
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     audio_arr = generation.cpu().numpy().squeeze()
     return SAMPLE_RATE, audio_arr
 # Gradio Interface
 with gr.Blocks() as demo:
     with gr.Row():
                                      value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
             run_button = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
+            audio_output = gr.Audio(label="Generated Audio")
             markdown_output = gr.Markdown()
+    # Helper function to combine audio arrays
+    def combine_audio_arrays(audio_list):
+        combined_audio = np.concatenate(audio_list, axis=0)
+        return combined_audio
+    # Adjust the handle_process function to accumulate and combine audio
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
+        # Extract and process text from PDF
+        print("Extracting text from PDF...")
         text = pdf_to_text(pdf_input.name)
+        print(f"Extracted text: {text[:100]}...")  # Display the first 100 characters for a quick preview
+        # Perform translation if enabled
         if translate_checkbox:
+            print("Translating text...")
             text = translate(text, source_lang, target_lang)
+            print(f"Translated text: {text[:100]}...")  # Display the first 100 characters for a quick preview
         sentences = split_text_into_sentences(text)
+        all_audio = []
         all_text = ""
         for sentence in sentences:
+            print(f"Processing sentence: {sentence[:50]}...")  # Display the first 50 characters for a quick preview
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
+            all_audio.append(audio_arr)
+            combined_audio = combine_audio_arrays(all_audio)
             all_text += f"**Sentence**: {sentence}\n\n"
+            # Yield the accumulated results
+            yield sample_rate, combined_audio, all_text
+        print("Processing complete.")
+    # Update the Gradio interface pipeline function to handle combined audio
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
+        # Stream outputs to Gradio interface
+        for sample_rate, combined_audio, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
+            yield (sample_rate, combined_audio), markdown_text
+    def handle_translation_toggle(translate_checkbox):
+        if translate_checkbox:
+            return gr.update(visible=True), gr.update(visible=True)
+        else:
+            return gr.update(visible=False), gr.update(visible=False)
+    translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
+    source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
+    run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_output, markdown_output])
 demo.queue()
 demo.launch(share=True)