Spaces:

emirhanbilgic
/

read-my-pdf-outloud

Running

App Files Files Community

emirhanbilgic commited on Aug 11, 2024

Commit

53b808e

verified ·

1 Parent(s): e666162

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -13

app.py CHANGED Viewed

@@ -7,7 +7,8 @@ from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 from PyPDF2 import PdfReader
 import re
 import textwrap
-import soundfile as sf
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -91,7 +92,7 @@ with gr.Blocks() as demo:
                                      value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
             run_button = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
-            audio_gallery = gr.Gallery(label="Generated Audios", item_type="audio")
             markdown_output = gr.Markdown()
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
@@ -100,25 +101,24 @@ with gr.Blocks() as demo:
             text = translate(text, source_lang, target_lang)
         sentences = split_text_into_sentences(text)
-        all_audio_paths = []
         all_text = ""
         for sentence in sentences:
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
-            # Create temporary audio file
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile:
-                sf.write(tmpfile, audio_arr, sample_rate)
-                all_audio_paths.append(tmpfile.name)
             all_text += f"**Sentence**: {sentence}\n\n"
-            yield all_audio_paths, all_text
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
-        for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
-            yield audio_data, markdown_text
-    translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
-    source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
-    run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_gallery, markdown_output])
 demo.queue()
 demo.launch(share=True)

 from PyPDF2 import PdfReader
 import re
 import textwrap
+import soundfile as SF
+import numpy as np
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                                      value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
             run_button = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
+            audio_container = gr.Column()
             markdown_output = gr.Markdown()
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
             text = translate(text, source_lang, target_lang)
         sentences = split_text_into_sentences(text)
+        all_audio_data = []
         all_text = ""
         for sentence in sentences:
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
+            audio_data = (sample_rate, audio_arr)
+            all_audio_data.append(audio_data)
             all_text += f"**Sentence**: {sentence}\n\n"
+            yield all_audio_data, all_text
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
+        audio_container.clear_components()  # Clear previous components
+        for audio_data_list, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
+            for sample_rate, audio_arr in audio_data_list:
+                audio_container.append(gr.Audio(value=(np.array(audio_arr).astype(np.float32), sample_rate)))
+            yield None, markdown_text
+    run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_container, markdown_output])
 demo.queue()
 demo.launch(share=True)