Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ def _grab_best_device(use_gpu=True):
|
|
15 |
device = _grab_best_device()
|
16 |
|
17 |
default_model_per_language = {
|
18 |
-
"marathi": "
|
19 |
}
|
20 |
|
21 |
models_per_language = {
|
@@ -58,26 +58,26 @@ def generate_audio(text, model_id, language):
|
|
58 |
out = []
|
59 |
# first generate original model result
|
60 |
output = pipe_dict["original_pipe"](text)
|
61 |
-
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label=f"
|
62 |
visible=True)
|
63 |
out.append(output)
|
64 |
|
65 |
|
66 |
-
if num_speakers>1:
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
else:
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
return out
|
82 |
|
83 |
|
|
|
15 |
device = _grab_best_device()
|
16 |
|
17 |
default_model_per_language = {
|
18 |
+
"marathi": "ylacombe/mms-mar-finetuned-monospeaker"
|
19 |
}
|
20 |
|
21 |
models_per_language = {
|
|
|
58 |
out = []
|
59 |
# first generate original model result
|
60 |
output = pipe_dict["original_pipe"](text)
|
61 |
+
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label=f"Finetuned model prediction {default_model_per_language[language]}", show_label=True,
|
62 |
visible=True)
|
63 |
out.append(output)
|
64 |
|
65 |
|
66 |
+
# if num_speakers>1:
|
67 |
+
# for i in range(min(num_speakers, max_speakers - 1)):
|
68 |
+
# forward_params = {"speaker_id": i}
|
69 |
+
# output = pipe_dict["pipe"](text, forward_params=forward_params)
|
70 |
|
71 |
+
# output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label=f"Generated Audio - speaker {i}", show_label=True,
|
72 |
+
# visible=True)
|
73 |
+
# out.append(output)
|
74 |
+
# out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
|
75 |
+
# else:
|
76 |
+
# output = pipe_dict["pipe"](text)
|
77 |
+
# output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label="Generated Audio - Mono speaker", show_label=True,
|
78 |
+
# visible=True)
|
79 |
+
# out.append(output)
|
80 |
+
# out.extend([gr.Audio(visible=False)]*(max_speakers-2))
|
81 |
return out
|
82 |
|
83 |
|