updated libs for easier installation, cannot upload audio, recorded audio doesn't work

Files changed (14) hide show

README.md CHANGED Viewed

@@ -11,3 +11,8 @@ license: cc-by-nc-sa-4.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+```bash
+conda create -n voicecraft_gradio python=3.10.13
+pip install -r requirements.txt
+```

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-# os.environ["CUDA_VISIBLE_DEVICES"] = "5" # these are only used if developping locally
 import gradio as gr
 import torch
 import torchaudio
@@ -381,7 +381,7 @@ with gr.Blocks() as app:
     with gr.Row():
         with gr.Column(scale=2):
-            input_audio = gr.Audio(value="./demo/84_121550_000074_000000.wav", label="Input Audio", type="filepath")
             with gr.Group():
                 original_transcript = gr.Textbox(label="Original transcript", lines=5, value=demo_original_transcript, interactive=False,
                                                  info="Use whisper model to get the transcript. Fix it if necessary.")
@@ -469,7 +469,7 @@ with gr.Blocks() as app:
                           inputs=[whisper_model_choice, voicecraft_model_choice],
                           outputs=[models_selector])
-    input_audio.upload(fn=update_input_audio,
                        inputs=[input_audio],
                        outputs=[prompt_end_time, edit_start_time, edit_end_time])
     transcribe_btn.click(fn=transcribe,

 import os
 # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+# os.environ["CUDA_VISIBLE_DEVICES"] = "1" # these are only used if developping locally
 import gradio as gr
 import torch
 import torchaudio
     with gr.Row():
         with gr.Column(scale=2):
+            input_audio = gr.Audio(sources=["upload", "microphone"], value="./demo/84_121550_000074_000000.wav", label="Input Audio", type="filepath", interactive=True)
             with gr.Group():
                 original_transcript = gr.Textbox(label="Original transcript", lines=5, value=demo_original_transcript, interactive=False,
                                                  info="Use whisper model to get the transcript. Fix it if necessary.")
                           inputs=[whisper_model_choice, voicecraft_model_choice],
                           outputs=[models_selector])
+    input_audio.change(fn=update_input_audio,
                        inputs=[input_audio],
                        outputs=[prompt_end_time, edit_start_time, edit_end_time])
     transcribe_btn.click(fn=transcribe,

data/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/data/__pycache__/__init__.cpython-310.pyc and b/data/__pycache__/__init__.cpython-310.pyc differ

data/__pycache__/tokenizer.cpython-310.pyc CHANGED Viewed

Binary files a/data/__pycache__/tokenizer.cpython-310.pyc and b/data/__pycache__/tokenizer.cpython-310.pyc differ

models/__pycache__/codebooks_patterns.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/codebooks_patterns.cpython-310.pyc and b/models/__pycache__/codebooks_patterns.cpython-310.pyc differ

models/__pycache__/voicecraft.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/voicecraft.cpython-310.pyc and b/models/__pycache__/voicecraft.cpython-310.pyc differ

models/modules/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/models/modules/__pycache__/__init__.cpython-310.pyc and b/models/modules/__pycache__/__init__.cpython-310.pyc differ

models/modules/__pycache__/activation.cpython-310.pyc CHANGED Viewed

Binary files a/models/modules/__pycache__/activation.cpython-310.pyc and b/models/modules/__pycache__/activation.cpython-310.pyc differ

models/modules/__pycache__/embedding.cpython-310.pyc CHANGED Viewed

Binary files a/models/modules/__pycache__/embedding.cpython-310.pyc and b/models/modules/__pycache__/embedding.cpython-310.pyc differ

models/modules/__pycache__/scaling.cpython-310.pyc CHANGED Viewed

Binary files a/models/modules/__pycache__/scaling.cpython-310.pyc and b/models/modules/__pycache__/scaling.cpython-310.pyc differ

models/modules/__pycache__/transformer.cpython-310.pyc CHANGED Viewed

Binary files a/models/modules/__pycache__/transformer.cpython-310.pyc and b/models/modules/__pycache__/transformer.cpython-310.pyc differ

models/modules/__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/models/modules/__pycache__/utils.cpython-310.pyc and b/models/modules/__pycache__/utils.cpython-310.pyc differ

pretrained_models/giga830M.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2454b51575822a04d24a00f8ba78f201f916439ffa62a3c1ac0ffa5220f429e3
+size 3358342977

requirements.txt CHANGED Viewed

@@ -1,9 +1,6 @@
--e git+https://github.com/facebookresearch/audiocraft.git@c5157b5bf14bf83449c17ea1eeb66c19fb4bc7f0#egg=audiocraft
-xformers==0.0.22
-torchaudio==2.0.2
-torch==2.0.1
 phonemizer==3.2.1
-gradio==3.50.2
 nltk>=3.8.1
 openai-whisper>=20231117
 spaces

+-e git+https://github.com/facebookresearch/audiocraft.git@f83babff6b5e97f75562127c4cc8122229c8f099#egg=audiocraft
 phonemizer==3.2.1
+gradio
 nltk>=3.8.1
 openai-whisper>=20231117
 spaces