emirhanbilgic
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -15,8 +15,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
15 |
# Initialize models and tokenizers
|
16 |
tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to(device)
|
17 |
tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
|
18 |
-
|
19 |
-
SAMPLE_RATE = feature_extractor.sampling_rate
|
20 |
SEED = 42
|
21 |
|
22 |
# Helper function to extract text from a PDF
|
@@ -95,23 +94,35 @@ with gr.Blocks() as demo:
|
|
95 |
|
96 |
def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
97 |
# Extract and process text from PDF
|
|
|
98 |
text = pdf_to_text(pdf_input.name)
|
|
|
99 |
|
100 |
# Perform translation if enabled
|
101 |
if translate_checkbox:
|
|
|
102 |
text = translate(text, source_lang, target_lang)
|
|
|
103 |
|
104 |
sentences = split_text_into_sentences(text)
|
|
|
|
|
105 |
|
106 |
for sentence in sentences:
|
107 |
-
#
|
108 |
sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
112 |
# Stream outputs to Gradio interface
|
113 |
for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
114 |
-
yield audio_data, markdown_text
|
115 |
|
116 |
def handle_translation_toggle(translate_checkbox):
|
117 |
if translate_checkbox:
|
|
|
15 |
# Initialize models and tokenizers
|
16 |
tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to(device)
|
17 |
tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
|
18 |
+
SAMPLE_RATE = 22050 # Adjust as needed
|
|
|
19 |
SEED = 42
|
20 |
|
21 |
# Helper function to extract text from a PDF
|
|
|
94 |
|
95 |
def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
96 |
# Extract and process text from PDF
|
97 |
+
print("Extracting text from PDF...")
|
98 |
text = pdf_to_text(pdf_input.name)
|
99 |
+
print(f"Extracted text: {text[:100]}...") # Display the first 100 characters for a quick preview
|
100 |
|
101 |
# Perform translation if enabled
|
102 |
if translate_checkbox:
|
103 |
+
print("Translating text...")
|
104 |
text = translate(text, source_lang, target_lang)
|
105 |
+
print(f"Translated text: {text[:100]}...") # Display the first 100 characters for a quick preview
|
106 |
|
107 |
sentences = split_text_into_sentences(text)
|
108 |
+
all_audio = []
|
109 |
+
all_text = ""
|
110 |
|
111 |
for sentence in sentences:
|
112 |
+
print(f"Processing sentence: {sentence[:50]}...") # Display the first 50 characters for a quick preview
|
113 |
sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
|
114 |
+
all_audio.append((sample_rate, audio_arr))
|
115 |
+
all_text += f"**Sentence**: {sentence}\n\n"
|
116 |
+
|
117 |
+
# Yield the accumulated results
|
118 |
+
yield all_audio, all_text
|
119 |
+
|
120 |
+
print("Processing complete.")
|
121 |
|
122 |
def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
123 |
# Stream outputs to Gradio interface
|
124 |
for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
125 |
+
yield audio_data[-1], markdown_text
|
126 |
|
127 |
def handle_translation_toggle(translate_checkbox):
|
128 |
if translate_checkbox:
|