vihangp commited on
Commit
ff3710a
·
verified ·
1 Parent(s): 5eb978f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -22,14 +22,14 @@ models_per_language = {
22
  "marathi": ["ylacombe/mms-mar-finetuned-monospeaker"]
23
  }
24
 
25
- HUB_PATH = "ylacombe/vits_ljs_midlands_male_monospeaker"
26
 
27
 
28
  pipe_dict = {
29
- "current_model": "ylacombe/vits_ljs_midlands_male_monospeaker",
30
  "pipe": pipeline("text-to-speech", model=HUB_PATH, device=0),
31
  "original_pipe": pipeline("text-to-speech", model=default_model_per_language["marathi"], device=0),
32
- "language": "english",
33
  }
34
 
35
  title = """
@@ -58,7 +58,7 @@ def generate_audio(text, model_id, language):
58
  out = []
59
  # first generate original model result
60
  output = pipe_dict["original_pipe"](text)
61
- output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label=f"Non finetuned model prediction {default_model_per_language[language]}", show_label=True,
62
  visible=True)
63
  out.append(output)
64
 
@@ -74,7 +74,7 @@ def generate_audio(text, model_id, language):
74
  out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
75
  else:
76
  output = pipe_dict["pipe"](text)
77
- output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label="Generated Audio - Mono speaker", show_label=True,
78
  visible=True)
79
  out.append(output)
80
  out.extend([gr.Audio(visible=False)]*(max_speakers-2))
 
22
  "marathi": ["ylacombe/mms-mar-finetuned-monospeaker"]
23
  }
24
 
25
+ HUB_PATH = "ylacombe/mms-mar-finetuned-monospeaker"
26
 
27
 
28
  pipe_dict = {
29
+ "current_model": "ylacombe/mms-mar-finetuned-monospeaker",
30
  "pipe": pipeline("text-to-speech", model=HUB_PATH, device=0),
31
  "original_pipe": pipeline("text-to-speech", model=default_model_per_language["marathi"], device=0),
32
+ "language": "marathi",
33
  }
34
 
35
  title = """
 
58
  out = []
59
  # first generate original model result
60
  output = pipe_dict["original_pipe"](text)
61
+ output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label=f"Non finetuned model prediction {default_model_per_language[language]}", show_label=True,
62
  visible=True)
63
  out.append(output)
64
 
 
74
  out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
75
  else:
76
  output = pipe_dict["pipe"](text)
77
+ output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label="Generated Audio - Mono speaker", show_label=True,
78
  visible=True)
79
  out.append(output)
80
  out.extend([gr.Audio(visible=False)]*(max_speakers-2))