Spaces:

ayaanzaveri
/

faster-whisper-api

Build error

ayaanzaveri commited on Apr 4, 2023

Commit

778f982

•

1 Parent(s): 56811cc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ def segment_to_dict(segment):
     return segment
 def download_video(video_url: str):
-    download_convert_video_to_audio(yt_dlp, video_url, f"/content/{uuid.uuid4().hex}")
 def transcribe_video(video_url: str, word_timestamps: bool = True, model_size: str = "tiny"):
     print(word_timestamps)
@@ -47,8 +47,8 @@ def transcribe_video(video_url: str, word_timestamps: bool = True, model_size: s
     print("getting hex")
     rand_id = uuid.uuid4().hex
     print("doing download")
-    download_convert_video_to_audio(yt_dlp, video_url, f"/content/{rand_id}")
-    segments, info = model.transcribe(f"/content/{rand_id}.mp3", beam_size=5, word_timestamps=word_timestamps)
     segments = [segment_to_dict(segment) for segment in segments]
     total_duration = round(info.duration, 2)  # Same precision as the Whisper timestamps.
     with tqdm(total=total_duration, unit=" seconds") as pbar:
@@ -58,7 +58,7 @@ def transcribe_video(video_url: str, word_timestamps: bool = True, model_size: s
     print(pbar)
     print(info)
-    os.remove(f"/content/{rand_id}.mp3")
     print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
     print(segments)
     return segments

     return segment
 def download_video(video_url: str):
+    download_convert_video_to_audio(yt_dlp, video_url, f"{uuid.uuid4().hex}")
 def transcribe_video(video_url: str, word_timestamps: bool = True, model_size: str = "tiny"):
     print(word_timestamps)
     print("getting hex")
     rand_id = uuid.uuid4().hex
     print("doing download")
+    download_convert_video_to_audio(yt_dlp, video_url, f"{rand_id}")
+    segments, info = model.transcribe(f"{rand_id}.mp3", beam_size=5, word_timestamps=word_timestamps)
     segments = [segment_to_dict(segment) for segment in segments]
     total_duration = round(info.duration, 2)  # Same precision as the Whisper timestamps.
     with tqdm(total=total_duration, unit=" seconds") as pbar:
     print(pbar)
     print(info)
+    os.remove(f"{rand_id}.mp3")
     print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
     print(segments)
     return segments