Update app.py
Browse files
app.py
CHANGED
@@ -2,10 +2,26 @@ import torch
|
|
2 |
from peft import PeftModel
|
3 |
import transformers
|
4 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
import whisper
|
7 |
model1 = whisper.load_model("small")
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
assert (
|
10 |
"LlamaTokenizer" in transformers._import_structure["models.llama"]
|
11 |
), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
|
@@ -135,7 +151,22 @@ def evaluate(
|
|
135 |
)
|
136 |
s = generation_output.sequences[0]
|
137 |
output = tokenizer.decode(s)
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
|
141 |
g = gr.Interface(
|
@@ -154,8 +185,13 @@ g = gr.Interface(
|
|
154 |
outputs=[
|
155 |
gr.inputs.Textbox(
|
156 |
lines=5,
|
157 |
-
label="
|
158 |
-
)
|
|
|
|
|
|
|
|
|
|
|
159 |
],
|
160 |
title="🦙🌲 Alpaca-LoRA",
|
161 |
description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
|
|
|
2 |
from peft import PeftModel
|
3 |
import transformers
|
4 |
import gradio as gr
|
5 |
+
import os
|
6 |
+
os.system('pip install voicefixer --upgrade')
|
7 |
+
from voicefixer import VoiceFixer
|
8 |
+
voicefixer = VoiceFixer()
|
9 |
+
|
10 |
+
from TTS.api import TTS
|
11 |
+
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
|
12 |
|
13 |
import whisper
|
14 |
model1 = whisper.load_model("small")
|
15 |
|
16 |
+
import torchaudio
|
17 |
+
from speechbrain.pretrained import SpectralMaskEnhancement
|
18 |
+
|
19 |
+
enhance_model = SpectralMaskEnhancement.from_hparams(
|
20 |
+
source="speechbrain/metricgan-plus-voicebank",
|
21 |
+
savedir="pretrained_models/metricgan-plus-voicebank",
|
22 |
+
run_opts={"device":"cuda"},
|
23 |
+
)
|
24 |
+
|
25 |
assert (
|
26 |
"LlamaTokenizer" in transformers._import_structure["models.llama"]
|
27 |
), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
|
|
|
151 |
)
|
152 |
s = generation_output.sequences[0]
|
153 |
output = tokenizer.decode(s)
|
154 |
+
|
155 |
+
tts.tts_to_file(output.split("### Response:")[1].strip(), speaker_wav = upload, language="en", file_path="output.wav")
|
156 |
+
|
157 |
+
voicefixer.restore(input="output.wav", # input wav file path
|
158 |
+
output="audio1.wav", # output wav file path
|
159 |
+
cuda=True, # whether to use gpu acceleration
|
160 |
+
mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
|
161 |
+
|
162 |
+
noisy = enhance_model.load_audio(
|
163 |
+
"audio1.wav"
|
164 |
+
).unsqueeze(0)
|
165 |
+
|
166 |
+
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
167 |
+
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
|
168 |
+
|
169 |
+
return [result.text, output.split("### Response:")[1].strip(), "enhanced.wav"]
|
170 |
|
171 |
|
172 |
g = gr.Interface(
|
|
|
185 |
outputs=[
|
186 |
gr.inputs.Textbox(
|
187 |
lines=5,
|
188 |
+
label="Speech to Text",
|
189 |
+
),
|
190 |
+
gr.inputs.Textbox(
|
191 |
+
lines=5,
|
192 |
+
label="Alpaca Output",
|
193 |
+
),
|
194 |
+
gr.Audio(label="Audio with Custom Voice"),
|
195 |
],
|
196 |
title="🦙🌲 Alpaca-LoRA",
|
197 |
description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
|