ercaronte commited on
Commit
2fbbc0b
1 Parent(s): 9fe1c1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -21,10 +21,10 @@ en_fr_translator = pipeline("translation_en_to_fr")
21
 
22
 
23
  # load text-to-speech
24
- model = VitsModel.from_pretrained("facebook/mms-tts-fra")
25
  tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-fra")
26
 
27
- def synthesise(text):
28
  translation_to_french = en_fr_translator(text)
29
  french_text = translation_to_french[0]['translation_text']
30
 
@@ -32,25 +32,25 @@ def synthesise(text):
32
  input_ids = inputs["input_ids"]
33
 
34
  with torch.no_grad():
35
- outputs = model(input_ids)
36
 
37
  speech = outputs["waveform"]
38
  return speech
39
 
40
 
41
  # load text-to-speech checkpoint and speaker embeddings
42
- #processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
43
 
44
- #model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
45
- #vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
46
 
47
- #embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
48
- #speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
49
 
50
- #def synthesise(text):
51
- # inputs = processor(text=text, return_tensors="pt")
52
- # speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
53
- # return speech.cpu()
54
 
55
 
56
  def speech_to_speech_translation(audio):
 
21
 
22
 
23
  # load text-to-speech
24
+ model_new = VitsModel.from_pretrained("facebook/mms-tts-fra")
25
  tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-fra")
26
 
27
+ def synthesise_new(text):
28
  translation_to_french = en_fr_translator(text)
29
  french_text = translation_to_french[0]['translation_text']
30
 
 
32
  input_ids = inputs["input_ids"]
33
 
34
  with torch.no_grad():
35
+ outputs = model_new(input_ids)
36
 
37
  speech = outputs["waveform"]
38
  return speech
39
 
40
 
41
  # load text-to-speech checkpoint and speaker embeddings
42
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
43
 
44
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
45
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
46
 
47
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
48
+ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
49
 
50
+ def synthesise(text):
51
+ inputs = processor(text=text, return_tensors="pt")
52
+ speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
53
+ return speech.cpu()
54
 
55
 
56
  def speech_to_speech_translation(audio):