ayaanzaveri commited on
Commit
56811cc
1 Parent(s): 5ae2523

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -4,7 +4,7 @@ import yt_dlp
4
  import uuid
5
  import os
6
  import gradio as gr
7
-
8
 
9
  # List of all supported video sites here https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md
10
  def download_convert_video_to_audio(
@@ -21,7 +21,6 @@ def download_convert_video_to_audio(
21
  }
22
  ],
23
  "outtmpl": f"{destination_path}.%(ext)s",
24
- "concurrent-fragments": 128
25
  }
26
  try:
27
  print(f"Downloading video from {video_url}")
@@ -38,23 +37,28 @@ def segment_to_dict(segment):
38
  return segment
39
 
40
  def download_video(video_url: str):
41
- download_convert_video_to_audio(yt_dlp, video_url, f"{uuid.uuid4().hex}")
42
 
43
- def transcribe_video(video_url: str, beam_size: int = 5, model_size: str = "tiny", word_timestamps: bool = True):
 
44
  print("loading model")
45
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
 
46
  print("getting hex")
47
  rand_id = uuid.uuid4().hex
48
  print("doing download")
49
- download_convert_video_to_audio(yt_dlp, video_url, f"{rand_id}")
50
- print("done download")
51
- print("doing transcribe")
52
- segments, info = model.transcribe(f"{rand_id}.mp3", beam_size=beam_size, word_timestamps=word_timestamps)
53
- print(info)
54
  segments = [segment_to_dict(segment) for segment in segments]
55
  total_duration = round(info.duration, 2) # Same precision as the Whisper timestamps.
 
 
 
 
 
 
56
  print(info)
57
- os.remove(f"{rand_id}.mp3")
58
  print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
59
  print(segments)
60
  return segments
@@ -64,6 +68,10 @@ def transcribe_video(video_url: str, beam_size: int = 5, model_size: str = "tiny
64
  # for segment in segments:
65
  # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
66
 
67
- demo = gr.Interface(fn=transcribe_video, inputs="text", outputs="json")
 
 
 
 
68
 
69
  demo.launch()
 
4
  import uuid
5
  import os
6
  import gradio as gr
7
+ from tqdm import tqdm
8
 
9
  # List of all supported video sites here https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md
10
  def download_convert_video_to_audio(
 
21
  }
22
  ],
23
  "outtmpl": f"{destination_path}.%(ext)s",
 
24
  }
25
  try:
26
  print(f"Downloading video from {video_url}")
 
37
  return segment
38
 
39
  def download_video(video_url: str):
40
+ download_convert_video_to_audio(yt_dlp, video_url, f"/content/{uuid.uuid4().hex}")
41
 
42
+ def transcribe_video(video_url: str, word_timestamps: bool = True, model_size: str = "tiny"):
43
+ print(word_timestamps)
44
  print("loading model")
45
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
46
+ # model = WhisperModel(model_size, device="cuda", compute_type="float16")
47
  print("getting hex")
48
  rand_id = uuid.uuid4().hex
49
  print("doing download")
50
+ download_convert_video_to_audio(yt_dlp, video_url, f"/content/{rand_id}")
51
+ segments, info = model.transcribe(f"/content/{rand_id}.mp3", beam_size=5, word_timestamps=word_timestamps)
 
 
 
52
  segments = [segment_to_dict(segment) for segment in segments]
53
  total_duration = round(info.duration, 2) # Same precision as the Whisper timestamps.
54
+ with tqdm(total=total_duration, unit=" seconds") as pbar:
55
+ for segment in segments:
56
+ segment_duration = segment.end - segment.start
57
+ pbar.update(segment_duration)
58
+
59
+ print(pbar)
60
  print(info)
61
+ os.remove(f"/content/{rand_id}.mp3")
62
  print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
63
  print(segments)
64
  return segments
 
68
  # for segment in segments:
69
  # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
70
 
71
+ demo = gr.Interface(fn=transcribe_video, inputs=[
72
+ gr.Textbox(label="Video URL"),
73
+ gr.Checkbox(label="Word Timestamps", info="Do you want word timestamps in the response?"),
74
+ gr.Dropdown(label="Model", value="tiny", choices=["tiny", "base", "small"])
75
+ ], outputs="text")
76
 
77
  demo.launch()