Update app.py
Browse files
app.py
CHANGED
@@ -34,7 +34,29 @@ def split_text_into_sentences(text):
|
|
34 |
sentences = sentence_endings.split(text)
|
35 |
return [sentence.strip() for sentence in sentences if sentence.strip()]
|
36 |
|
37 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
def preprocess(text):
|
39 |
text = text.replace("-", " ")
|
40 |
if text[-1] not in ".!?":
|
@@ -53,8 +75,6 @@ def generate_single_wav_from_text(sentence, description):
|
|
53 |
prompt_attention_mask=prompt.attention_mask, do_sample=True, temperature=1.0
|
54 |
)
|
55 |
audio_arr = generation.cpu().numpy().squeeze()
|
56 |
-
output_file = f"sentence.wav"
|
57 |
-
sf.write(output_file, audio_arr, SAMPLE_RATE)
|
58 |
return SAMPLE_RATE, audio_arr
|
59 |
|
60 |
# Gradio Interface
|
@@ -62,28 +82,45 @@ with gr.Blocks() as demo:
|
|
62 |
with gr.Row():
|
63 |
with gr.Column():
|
64 |
pdf_input = gr.File(label="Upload PDF", file_types=['pdf'])
|
|
|
|
|
|
|
65 |
description = gr.Textbox(label="Voice Description", lines=2,
|
66 |
value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
|
67 |
run_button = gr.Button("Generate Audio", variant="primary")
|
68 |
with gr.Column():
|
69 |
audio_output = gr.Audio(label="Generated Audio")
|
|
|
70 |
|
71 |
-
def handle_process(pdf_input, description):
|
72 |
# Extract and process text from PDF
|
73 |
text = pdf_to_text(pdf_input.name)
|
|
|
|
|
|
|
|
|
|
|
74 |
sentences = split_text_into_sentences(text)
|
75 |
|
76 |
for sentence in sentences:
|
77 |
# Generate audio for each sentence
|
78 |
sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
|
79 |
-
yield
|
80 |
|
81 |
-
def run_pipeline(pdf_input, description):
|
82 |
# Stream outputs to Gradio interface
|
83 |
-
for
|
84 |
-
yield
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
|
|
|
|
|
87 |
|
88 |
demo.queue()
|
89 |
demo.launch(share=True)
|
|
|
34 |
sentences = sentence_endings.split(text)
|
35 |
return [sentence.strip() for sentence in sentences if sentence.strip()]
|
36 |
|
37 |
+
# Translation function
|
38 |
+
@spaces.GPU(duration=120)
|
39 |
+
def translate(source_text, source_lang, target_lang, batch_size=16):
|
40 |
+
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
41 |
+
|
42 |
+
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
43 |
+
model = MarianMTModel.from_pretrained(model_name).to(device)
|
44 |
+
|
45 |
+
text_chunks = textwrap.wrap(source_text, 512)
|
46 |
+
translated_text = ""
|
47 |
+
|
48 |
+
for i in range(0, len(text_chunks), batch_size):
|
49 |
+
text_batch = text_chunks[i:i+batch_size]
|
50 |
+
input_ids = tokenizer(text_batch, return_tensors="pt", padding=True, truncation=True, max_length=512).input_ids.to(device)
|
51 |
+
output_ids = model.generate(input_ids, max_new_tokens=512)
|
52 |
+
|
53 |
+
for output in output_ids:
|
54 |
+
output_text = tokenizer.decode(output, skip_special_tokens=True)
|
55 |
+
translated_text += output_text + " "
|
56 |
+
|
57 |
+
return translated_text
|
58 |
+
|
59 |
+
# Function to preprocess the text (normalization, punctuation)
|
60 |
def preprocess(text):
|
61 |
text = text.replace("-", " ")
|
62 |
if text[-1] not in ".!?":
|
|
|
75 |
prompt_attention_mask=prompt.attention_mask, do_sample=True, temperature=1.0
|
76 |
)
|
77 |
audio_arr = generation.cpu().numpy().squeeze()
|
|
|
|
|
78 |
return SAMPLE_RATE, audio_arr
|
79 |
|
80 |
# Gradio Interface
|
|
|
82 |
with gr.Row():
|
83 |
with gr.Column():
|
84 |
pdf_input = gr.File(label="Upload PDF", file_types=['pdf'])
|
85 |
+
translate_checkbox = gr.Checkbox(label="Enable Translation", value=False)
|
86 |
+
source_lang = gr.Dropdown(choices=["en", "tr", "de", "fr"], label="Source Language", value="en", interactive=True)
|
87 |
+
target_lang = gr.Dropdown(choices=["tr"], label="Target Language", value="tr", interactive=True)
|
88 |
description = gr.Textbox(label="Voice Description", lines=2,
|
89 |
value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
|
90 |
run_button = gr.Button("Generate Audio", variant="primary")
|
91 |
with gr.Column():
|
92 |
audio_output = gr.Audio(label="Generated Audio")
|
93 |
+
markdown_output = gr.Markdown()
|
94 |
|
95 |
+
def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
96 |
# Extract and process text from PDF
|
97 |
text = pdf_to_text(pdf_input.name)
|
98 |
+
|
99 |
+
# Perform translation if enabled
|
100 |
+
if translate_checkbox:
|
101 |
+
text = translate(text, source_lang, target_lang)
|
102 |
+
|
103 |
sentences = split_text_into_sentences(text)
|
104 |
|
105 |
for sentence in sentences:
|
106 |
# Generate audio for each sentence
|
107 |
sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
|
108 |
+
yield (sample_rate, audio_arr), f"**Sentence**: {sentence}"
|
109 |
|
110 |
+
def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
111 |
# Stream outputs to Gradio interface
|
112 |
+
for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
|
113 |
+
yield audio_data, markdown_text
|
114 |
+
|
115 |
+
def handle_translation_toggle(translate_checkbox):
|
116 |
+
if translate_checkbox:
|
117 |
+
return gr.update(visible=True), gr.update(visible=True)
|
118 |
+
else:
|
119 |
+
return gr.update(visible=False), gr.update(visible=False)
|
120 |
|
121 |
+
translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
|
122 |
+
source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
|
123 |
+
run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_output, markdown_output])
|
124 |
|
125 |
demo.queue()
|
126 |
demo.launch(share=True)
|