Spaces:

emirhanbilgic
/

read-my-pdf-outloud

Running

App Files Files Community

emirhanbilgic commited on Aug 11, 2024

Commit

8a94ca8

verified ·

1 Parent(s): 88338d4

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -5

app.py CHANGED Viewed

@@ -15,8 +15,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Initialize models and tokenizers
 tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to(device)
 tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
-feature_extractor = AutoFeatureExtractor.from_pretrained("parler-tts/parler-tts-mini-v1")
-SAMPLE_RATE = feature_extractor.sampling_rate
 SEED = 42
 # Helper function to extract text from a PDF
@@ -95,23 +94,35 @@ with gr.Blocks() as demo:
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
         # Extract and process text from PDF
         text = pdf_to_text(pdf_input.name)
         # Perform translation if enabled
         if translate_checkbox:
             text = translate(text, source_lang, target_lang)
         sentences = split_text_into_sentences(text)
         for sentence in sentences:
-            # Generate audio for each sentence
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
-            yield (sample_rate, audio_arr), f"**Sentence**: {sentence}"
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
         # Stream outputs to Gradio interface
         for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
-            yield audio_data, markdown_text
     def handle_translation_toggle(translate_checkbox):
         if translate_checkbox:

 # Initialize models and tokenizers
 tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to(device)
 tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
+SAMPLE_RATE = 22050  # Adjust as needed
 SEED = 42
 # Helper function to extract text from a PDF
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
         # Extract and process text from PDF
+        print("Extracting text from PDF...")
         text = pdf_to_text(pdf_input.name)
+        print(f"Extracted text: {text[:100]}...")  # Display the first 100 characters for a quick preview
         # Perform translation if enabled
         if translate_checkbox:
+            print("Translating text...")
             text = translate(text, source_lang, target_lang)
+            print(f"Translated text: {text[:100]}...")  # Display the first 100 characters for a quick preview
         sentences = split_text_into_sentences(text)
+        all_audio = []
+        all_text = ""
         for sentence in sentences:
+            print(f"Processing sentence: {sentence[:50]}...")  # Display the first 50 characters for a quick preview
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
+            all_audio.append((sample_rate, audio_arr))
+            all_text += f"**Sentence**: {sentence}\n\n"
+            # Yield the accumulated results
+            yield all_audio, all_text
+        print("Processing complete.")
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
         # Stream outputs to Gradio interface
         for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
+            yield audio_data[-1], markdown_text
     def handle_translation_toggle(translate_checkbox):
         if translate_checkbox: