Spaces:

fishaudio
/

fish-diffusion

Sleeping

File size: 3,820 Bytes

e69f277
 
b0ea01b
 
e69f277
 
 
c7e89ae
69efa3d
 
c7e89ae
c488270
c7e89ae
b0ea01b
d102810
c488270
60aad2d
e997c1f
c488270
 
 
0a4f982
 
c488270
0a4f982
 
a1f15d2
c488270
 
76d1ff4
 
c488270
76d1ff4
 
be2c1a9
c488270
 
a747fe2
 
c488270
a747fe2
 
c488270
 
 
ce95425
 
c488270
ce95425
 
c488270
 
 
01814cd
 
c488270
01814cd
 
c488270
 
 
35d626e
 
 
 
 
 
 
 
0ec5db0
 
 
 
 
 
 
 
a8c4a68

readme: |
  # Fish Diffusion - HiFiSinger Demo 🎤
  GitHub Repo: [fishaudio/fish-diffusion](https://github.com/fishaudio/fish-diffusion) 
  To share a new model, please check out the [Share Your Model](https://huggingface.co./spaces/fishaudio/fish-diffusion/discussions/2) discussion.
max_mixing_speakers: 3

models:
  - name: "M4Singer Pretrained (Many Speakers, Alto, Tenor, Soprano, Bass)"
    config: configs/M4Singer.py
    checkpoint: checkpoints/M4Singer.ckpt
    readme: |
      This model is trained on the Opencpop and M4Singer dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It contains more than 20 speakers and is thus a good playground for timbre mixing.
    default_speaker: "opencpop"
    
  - name: "Tohoku Kiritan (Feminine)"
    config: configs/Kiritan.py
    checkpoint: checkpoints/Kiritan.ckpt
    readme: |
      This model is trained on the Tohoku Kiritan dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a cute, yet powerful voice. 
    default_speaker: "kiritan"

  - name: "Tohoku Itako (Feminine)"
    config: configs/Itako.py
    checkpoint: checkpoints/Itako.ckpt
    readme: |
      This model is trained on the Tohoku Itako dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a bright and whispery voice. 
    default_speaker: "itako"

  - name: "No.7 (Feminine)"
    config: configs/Seven.py
    checkpoint: checkpoints/Seven.ckpt
    readme: |
      This model is trained on the No.7 dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a strong and sharp voice.
    default_speaker: "seven"

  - name: "Yoko (Feminine)"
    config: configs/Yoko.py
    checkpoint: checkpoints/Yoko.ckpt
    readme: |
      This model is trained on the Sinsy-f00001 dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a powerful, tense, and relaxed voice.
    default_speaker: "yoko"

  - name: "JSUT (Feminine)"
    config: configs/JSUT.py
    checkpoint: checkpoints/JSUT.ckpt
    readme: |
      This model is trained on the JSUT-song dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a moist and transparent voice.
    default_speaker: "jsut"

  - name: "CSD (Feminine)"
    config: configs/CSD.py
    checkpoint: checkpoints/CSD.ckpt
    readme: |
      This model is trained on the Children's Song Dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a sweet and tender voice.
    default_speaker: "csd"

  - name: "Namine Ritsu (Feminine)"
    config: configs/Ritsu.py
    checkpoint: checkpoints/Ritsu.ckpt
    readme: |
      This model is trained on the Namine Ritsu ENUNU Dataset and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a powerful and throaty voice.
    default_speaker: "ritsu"

  - name: "S (Masculine)"
    config: configs/S.py
    checkpoint: checkpoints/S.ckpt
    readme: |
      This model is trained on a datset known as S and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a nasally but powerful voice.
    default_speaker: "s"

  - name: "C (Feminine)"
    config: configs/C.py
    checkpoint: checkpoints/C.ckpt
    readme: |
      This model is trained on a datset known as C and released under the [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
      It has a whispery, fluttery voice.
    default_speaker: "c"