Spaces:
Running
Running
jason-on-salt-a40
commited on
Commit
·
b1f4e2f
1
Parent(s):
579d79b
fix space error. fix encodec download path
Browse files
app.py
CHANGED
@@ -63,7 +63,7 @@ class WhisperModel:
|
|
63 |
def transcribe(self, audio_path):
|
64 |
return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
|
65 |
|
66 |
-
|
67 |
class WhisperxModel:
|
68 |
def __init__(self, model_name, align_model: WhisperxAlignModel):
|
69 |
from whisperx import load_model
|
@@ -100,7 +100,7 @@ def load_models(whisper_backend_name, whisper_model_name, alignment_model_name,
|
|
100 |
|
101 |
encodec_fn = f"{MODELS_PATH}/encodec_4cb2048_giga.th"
|
102 |
if not os.path.exists(encodec_fn):
|
103 |
-
os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th")
|
104 |
|
105 |
voicecraft_model = {
|
106 |
"config": config,
|
@@ -114,9 +114,11 @@ def load_models(whisper_backend_name, whisper_model_name, alignment_model_name,
|
|
114 |
|
115 |
def get_transcribe_state(segments):
|
116 |
words_info = [word_info for segment in segments for word_info in segment["words"]]
|
|
|
|
|
117 |
return {
|
118 |
"segments": segments,
|
119 |
-
"transcript":
|
120 |
"words_info": words_info,
|
121 |
"transcript_with_start_time": " ".join([f"{word['start']} {word['word']}" for word in words_info]),
|
122 |
"transcript_with_end_time": " ".join([f"{word['word']} {word['end']}" for word in words_info]),
|
@@ -140,7 +142,7 @@ def transcribe(seed, audio_path):
|
|
140 |
state
|
141 |
]
|
142 |
|
143 |
-
|
144 |
def align_segments(transcript, audio_path):
|
145 |
from aeneas.executetask import ExecuteTask
|
146 |
from aeneas.task import Task
|
@@ -363,7 +365,7 @@ If disabled, you should write the target transcript yourself:</br>
|
|
363 |
- In Edit mode write full prompt</br>
|
364 |
"""
|
365 |
|
366 |
-
demo_original_transcript = "
|
367 |
|
368 |
demo_text = {
|
369 |
"TTS": {
|
@@ -603,6 +605,7 @@ if __name__ == "__main__":
|
|
603 |
parser.add_argument("--models-path", default="./pretrained_models", help="Path to voicecraft models directory")
|
604 |
parser.add_argument("--port", default=7860, type=int, help="App port")
|
605 |
parser.add_argument("--share", action="store_true", help="Launch with public url")
|
|
|
606 |
|
607 |
os.environ["USER"] = os.getenv("USER", "user")
|
608 |
args = parser.parse_args()
|
@@ -611,4 +614,4 @@ if __name__ == "__main__":
|
|
611 |
MODELS_PATH = args.models_path
|
612 |
|
613 |
app = get_app()
|
614 |
-
app.queue().launch(share=args.share, server_port=args.port)
|
|
|
63 |
def transcribe(self, audio_path):
|
64 |
return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
|
65 |
|
66 |
+
|
67 |
class WhisperxModel:
|
68 |
def __init__(self, model_name, align_model: WhisperxAlignModel):
|
69 |
from whisperx import load_model
|
|
|
100 |
|
101 |
encodec_fn = f"{MODELS_PATH}/encodec_4cb2048_giga.th"
|
102 |
if not os.path.exists(encodec_fn):
|
103 |
+
os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th -O " + encodec_fn)
|
104 |
|
105 |
voicecraft_model = {
|
106 |
"config": config,
|
|
|
114 |
|
115 |
def get_transcribe_state(segments):
|
116 |
words_info = [word_info for segment in segments for word_info in segment["words"]]
|
117 |
+
transcript = " ".join([segment["text"] for segment in segments])
|
118 |
+
transcript = transcript[1:] if transcript[0] == " " else transcript
|
119 |
return {
|
120 |
"segments": segments,
|
121 |
+
"transcript": transcript,
|
122 |
"words_info": words_info,
|
123 |
"transcript_with_start_time": " ".join([f"{word['start']} {word['word']}" for word in words_info]),
|
124 |
"transcript_with_end_time": " ".join([f"{word['word']} {word['end']}" for word in words_info]),
|
|
|
142 |
state
|
143 |
]
|
144 |
|
145 |
+
@spaces.GPU(duration=60)
|
146 |
def align_segments(transcript, audio_path):
|
147 |
from aeneas.executetask import ExecuteTask
|
148 |
from aeneas.task import Task
|
|
|
365 |
- In Edit mode write full prompt</br>
|
366 |
"""
|
367 |
|
368 |
+
demo_original_transcript = "But when I had approached so near to them, the common object, which the sense deceives, lost not by distance any of its marks."
|
369 |
|
370 |
demo_text = {
|
371 |
"TTS": {
|
|
|
605 |
parser.add_argument("--models-path", default="./pretrained_models", help="Path to voicecraft models directory")
|
606 |
parser.add_argument("--port", default=7860, type=int, help="App port")
|
607 |
parser.add_argument("--share", action="store_true", help="Launch with public url")
|
608 |
+
parser.add_argument("--server_name", default="127.0.0.1", type=str, help="Server name for launching the app. 127.0.0.1 for localhost; 0.0.0.0 to allow access from other machines in the local network. Might also give access to external users depends on the firewall settings.")
|
609 |
|
610 |
os.environ["USER"] = os.getenv("USER", "user")
|
611 |
args = parser.parse_args()
|
|
|
614 |
MODELS_PATH = args.models_path
|
615 |
|
616 |
app = get_app()
|
617 |
+
app.queue().launch(share=args.share, server_name=args.server_name, server_port=args.port)
|