huseinzol05
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -9,4 +9,41 @@ Finetuned https://huggingface.co/parler-tts/parler-tts-mini-v1 on Malay TTS data
|
|
9 |
|
10 |
Source code at https://github.com/mesolitica/malaya-speech/tree/master/session/parler-tts
|
11 |
|
12 |
-
Wandb at https://wandb.ai/huseinzol05/parler-speech?nw=nwuserhuseinzol05
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
Source code at https://github.com/mesolitica/malaya-speech/tree/master/session/parler-tts
|
11 |
|
12 |
+
Wandb at https://wandb.ai/huseinzol05/parler-speech?nw=nwuserhuseinzol05
|
13 |
+
|
14 |
+
## how-to
|
15 |
+
|
16 |
+
```python
|
17 |
+
import torch
|
18 |
+
from parler_tts import ParlerTTSForConditionalGeneration
|
19 |
+
from transformers import AutoTokenizer
|
20 |
+
import soundfile as sf
|
21 |
+
|
22 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
23 |
+
|
24 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained("mesolitica/malay-parler-tts-mini-v1").to(device)
|
25 |
+
tokenizer = AutoTokenizer.from_pretrained("mesolitica/malay-parler-tts-mini-v1")
|
26 |
+
|
27 |
+
speakers = [
|
28 |
+
'Yasmin',
|
29 |
+
'Osman',
|
30 |
+
'Bunga',
|
31 |
+
'Ariff',
|
32 |
+
'Ayu',
|
33 |
+
'Kamarul',
|
34 |
+
'Danial',
|
35 |
+
'Elina',
|
36 |
+
]
|
37 |
+
|
38 |
+
prompt = 'Husein zolkepli sangat comel dan kacak suka makan cendol'
|
39 |
+
|
40 |
+
for s in speakers:
|
41 |
+
description = f"{s}'s voice, delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
|
42 |
+
|
43 |
+
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
44 |
+
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
45 |
+
|
46 |
+
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
47 |
+
audio_arr = generation.cpu()
|
48 |
+
sf.write(f'{s}.mp3', audio_arr.numpy().squeeze(), 44100)
|
49 |
+
```
|