emirhanbilgic commited on
Commit
0a52a3b
·
verified ·
1 Parent(s): c713231

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -9
app.py CHANGED
@@ -34,7 +34,29 @@ def split_text_into_sentences(text):
34
  sentences = sentence_endings.split(text)
35
  return [sentence.strip() for sentence in sentences if sentence.strip()]
36
 
37
- # Helper function to preprocess the text (normalization, punctuation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def preprocess(text):
39
  text = text.replace("-", " ")
40
  if text[-1] not in ".!?":
@@ -53,8 +75,6 @@ def generate_single_wav_from_text(sentence, description):
53
  prompt_attention_mask=prompt.attention_mask, do_sample=True, temperature=1.0
54
  )
55
  audio_arr = generation.cpu().numpy().squeeze()
56
- output_file = f"sentence.wav"
57
- sf.write(output_file, audio_arr, SAMPLE_RATE)
58
  return SAMPLE_RATE, audio_arr
59
 
60
  # Gradio Interface
@@ -62,28 +82,45 @@ with gr.Blocks() as demo:
62
  with gr.Row():
63
  with gr.Column():
64
  pdf_input = gr.File(label="Upload PDF", file_types=['pdf'])
 
 
 
65
  description = gr.Textbox(label="Voice Description", lines=2,
66
  value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
67
  run_button = gr.Button("Generate Audio", variant="primary")
68
  with gr.Column():
69
  audio_output = gr.Audio(label="Generated Audio")
 
70
 
71
- def handle_process(pdf_input, description):
72
  # Extract and process text from PDF
73
  text = pdf_to_text(pdf_input.name)
 
 
 
 
 
74
  sentences = split_text_into_sentences(text)
75
 
76
  for sentence in sentences:
77
  # Generate audio for each sentence
78
  sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
79
- yield gr.Audio.update(value=(sample_rate, audio_arr)), f"**Sentence**: {sentence}"
80
 
81
- def run_pipeline(pdf_input, description):
82
  # Stream outputs to Gradio interface
83
- for audio_component, markdown_component in handle_process(pdf_input, description):
84
- yield audio_component, gr.Markdown(markdown_component)
 
 
 
 
 
 
85
 
86
- run_button.click(run_pipeline, inputs=[pdf_input, description], outputs=[audio_output])
 
 
87
 
88
  demo.queue()
89
  demo.launch(share=True)
 
34
  sentences = sentence_endings.split(text)
35
  return [sentence.strip() for sentence in sentences if sentence.strip()]
36
 
37
+ # Translation function
38
+ @spaces.GPU(duration=120)
39
+ def translate(source_text, source_lang, target_lang, batch_size=16):
40
+ model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
41
+
42
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
43
+ model = MarianMTModel.from_pretrained(model_name).to(device)
44
+
45
+ text_chunks = textwrap.wrap(source_text, 512)
46
+ translated_text = ""
47
+
48
+ for i in range(0, len(text_chunks), batch_size):
49
+ text_batch = text_chunks[i:i+batch_size]
50
+ input_ids = tokenizer(text_batch, return_tensors="pt", padding=True, truncation=True, max_length=512).input_ids.to(device)
51
+ output_ids = model.generate(input_ids, max_new_tokens=512)
52
+
53
+ for output in output_ids:
54
+ output_text = tokenizer.decode(output, skip_special_tokens=True)
55
+ translated_text += output_text + " "
56
+
57
+ return translated_text
58
+
59
+ # Function to preprocess the text (normalization, punctuation)
60
  def preprocess(text):
61
  text = text.replace("-", " ")
62
  if text[-1] not in ".!?":
 
75
  prompt_attention_mask=prompt.attention_mask, do_sample=True, temperature=1.0
76
  )
77
  audio_arr = generation.cpu().numpy().squeeze()
 
 
78
  return SAMPLE_RATE, audio_arr
79
 
80
  # Gradio Interface
 
82
  with gr.Row():
83
  with gr.Column():
84
  pdf_input = gr.File(label="Upload PDF", file_types=['pdf'])
85
+ translate_checkbox = gr.Checkbox(label="Enable Translation", value=False)
86
+ source_lang = gr.Dropdown(choices=["en", "tr", "de", "fr"], label="Source Language", value="en", interactive=True)
87
+ target_lang = gr.Dropdown(choices=["tr"], label="Target Language", value="tr", interactive=True)
88
  description = gr.Textbox(label="Voice Description", lines=2,
89
  value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
90
  run_button = gr.Button("Generate Audio", variant="primary")
91
  with gr.Column():
92
  audio_output = gr.Audio(label="Generated Audio")
93
+ markdown_output = gr.Markdown()
94
 
95
+ def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
96
  # Extract and process text from PDF
97
  text = pdf_to_text(pdf_input.name)
98
+
99
+ # Perform translation if enabled
100
+ if translate_checkbox:
101
+ text = translate(text, source_lang, target_lang)
102
+
103
  sentences = split_text_into_sentences(text)
104
 
105
  for sentence in sentences:
106
  # Generate audio for each sentence
107
  sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
108
+ yield (sample_rate, audio_arr), f"**Sentence**: {sentence}"
109
 
110
+ def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
111
  # Stream outputs to Gradio interface
112
+ for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
113
+ yield audio_data, markdown_text
114
+
115
+ def handle_translation_toggle(translate_checkbox):
116
+ if translate_checkbox:
117
+ return gr.update(visible=True), gr.update(visible=True)
118
+ else:
119
+ return gr.update(visible=False), gr.update(visible=False)
120
 
121
+ translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
122
+ source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
123
+ run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_output, markdown_output])
124
 
125
  demo.queue()
126
  demo.launch(share=True)