jason-on-salt-a40 commited on
Commit
b1f4e2f
·
1 Parent(s): 579d79b

fix space error. fix encodec download path

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -63,7 +63,7 @@ class WhisperModel:
63
  def transcribe(self, audio_path):
64
  return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
65
 
66
- @spaces.GPU(duration=120)
67
  class WhisperxModel:
68
  def __init__(self, model_name, align_model: WhisperxAlignModel):
69
  from whisperx import load_model
@@ -100,7 +100,7 @@ def load_models(whisper_backend_name, whisper_model_name, alignment_model_name,
100
 
101
  encodec_fn = f"{MODELS_PATH}/encodec_4cb2048_giga.th"
102
  if not os.path.exists(encodec_fn):
103
- os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th")
104
 
105
  voicecraft_model = {
106
  "config": config,
@@ -114,9 +114,11 @@ def load_models(whisper_backend_name, whisper_model_name, alignment_model_name,
114
 
115
  def get_transcribe_state(segments):
116
  words_info = [word_info for segment in segments for word_info in segment["words"]]
 
 
117
  return {
118
  "segments": segments,
119
- "transcript": " ".join([segment["text"] for segment in segments]),
120
  "words_info": words_info,
121
  "transcript_with_start_time": " ".join([f"{word['start']} {word['word']}" for word in words_info]),
122
  "transcript_with_end_time": " ".join([f"{word['word']} {word['end']}" for word in words_info]),
@@ -140,7 +142,7 @@ def transcribe(seed, audio_path):
140
  state
141
  ]
142
 
143
-
144
  def align_segments(transcript, audio_path):
145
  from aeneas.executetask import ExecuteTask
146
  from aeneas.task import Task
@@ -363,7 +365,7 @@ If disabled, you should write the target transcript yourself:</br>
363
  - In Edit mode write full prompt</br>
364
  """
365
 
366
- demo_original_transcript = " But when I had approached so near to them, the common object, which the sense deceives, lost not by distance any of its marks."
367
 
368
  demo_text = {
369
  "TTS": {
@@ -603,6 +605,7 @@ if __name__ == "__main__":
603
  parser.add_argument("--models-path", default="./pretrained_models", help="Path to voicecraft models directory")
604
  parser.add_argument("--port", default=7860, type=int, help="App port")
605
  parser.add_argument("--share", action="store_true", help="Launch with public url")
 
606
 
607
  os.environ["USER"] = os.getenv("USER", "user")
608
  args = parser.parse_args()
@@ -611,4 +614,4 @@ if __name__ == "__main__":
611
  MODELS_PATH = args.models_path
612
 
613
  app = get_app()
614
- app.queue().launch(share=args.share, server_port=args.port)
 
63
  def transcribe(self, audio_path):
64
  return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
65
 
66
+
67
  class WhisperxModel:
68
  def __init__(self, model_name, align_model: WhisperxAlignModel):
69
  from whisperx import load_model
 
100
 
101
  encodec_fn = f"{MODELS_PATH}/encodec_4cb2048_giga.th"
102
  if not os.path.exists(encodec_fn):
103
+ os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th -O " + encodec_fn)
104
 
105
  voicecraft_model = {
106
  "config": config,
 
114
 
115
  def get_transcribe_state(segments):
116
  words_info = [word_info for segment in segments for word_info in segment["words"]]
117
+ transcript = " ".join([segment["text"] for segment in segments])
118
+ transcript = transcript[1:] if transcript[0] == " " else transcript
119
  return {
120
  "segments": segments,
121
+ "transcript": transcript,
122
  "words_info": words_info,
123
  "transcript_with_start_time": " ".join([f"{word['start']} {word['word']}" for word in words_info]),
124
  "transcript_with_end_time": " ".join([f"{word['word']} {word['end']}" for word in words_info]),
 
142
  state
143
  ]
144
 
145
+ @spaces.GPU(duration=60)
146
  def align_segments(transcript, audio_path):
147
  from aeneas.executetask import ExecuteTask
148
  from aeneas.task import Task
 
365
  - In Edit mode write full prompt</br>
366
  """
367
 
368
+ demo_original_transcript = "But when I had approached so near to them, the common object, which the sense deceives, lost not by distance any of its marks."
369
 
370
  demo_text = {
371
  "TTS": {
 
605
  parser.add_argument("--models-path", default="./pretrained_models", help="Path to voicecraft models directory")
606
  parser.add_argument("--port", default=7860, type=int, help="App port")
607
  parser.add_argument("--share", action="store_true", help="Launch with public url")
608
+ parser.add_argument("--server_name", default="127.0.0.1", type=str, help="Server name for launching the app. 127.0.0.1 for localhost; 0.0.0.0 to allow access from other machines in the local network. Might also give access to external users depends on the firewall settings.")
609
 
610
  os.environ["USER"] = os.getenv("USER", "user")
611
  args = parser.parse_args()
 
614
  MODELS_PATH = args.models_path
615
 
616
  app = get_app()
617
+ app.queue().launch(share=args.share, server_name=args.server_name, server_port=args.port)