TanelAlumae commited on
Commit
abba8cd
·
verified ·
1 Parent(s): fce98f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -58,14 +58,32 @@ def convert_to_vtt(whisper_output):
58
 
59
  return vtt_output
60
 
61
- @spaces.GPU(duration=600)
62
- def transcribe(inputs):
 
 
 
 
 
 
 
 
 
63
  if inputs is None:
64
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
65
 
66
  result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe", "language": "et"}, return_timestamps=True)
67
  return convert_to_vtt(result)
68
 
 
 
 
 
 
 
 
 
 
69
 
70
  def _return_yt_html_embed(yt_url):
71
  video_id = yt_url.split("?v=")[-1]
@@ -106,20 +124,14 @@ def download_yt_audio(yt_url, filename):
106
  except youtube_dl.utils.ExtractorError as err:
107
  raise gr.Error(str(err))
108
 
109
- @spaces.GPU
110
  def yt_transcribe(yt_url, max_filesize=75.0):
111
 
112
  with tempfile.TemporaryDirectory() as tmpdirname:
113
  filepath = os.path.join(tmpdirname, "video.mp4")
114
  download_yt_audio(yt_url, filepath)
115
- with open(filepath, "rb") as f:
116
- inputs = f.read()
117
-
118
- inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
119
- inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
120
-
121
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe", "language": "et"}, return_timestamps=True)
122
- text = convert_to_vtt(result)
123
 
124
  return text
125
 
 
58
 
59
  return vtt_output
60
 
61
+
62
+ def dynamic_gpu_duration(func, duration, *args):
63
+
64
+ # @torch.inference_mode()
65
+ @spaces.GPU(duration=duration)
66
+ def wrapped_func():
67
+ yield from func(*args)
68
+
69
+ return wrapped_func()
70
+
71
+ def do_transcribe(inputs):
72
  if inputs is None:
73
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
74
 
75
  result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe", "language": "et"}, return_timestamps=True)
76
  return convert_to_vtt(result)
77
 
78
+ def transcribe(file_path):
79
+ audio_data, sampling_rate = ffmpeg_read(file_path)
80
+
81
+ # Calculate the length in seconds
82
+ audio_length = len(audio_data) / sampling_rate
83
+ expected_transcribe_duration = audio_length/5.0
84
+ gr.Info(f"Starting to transcribe, requesting a GPU for {expected_transcribe_duration} seconds")
85
+ return dynamic_gpu_duration(do_transcribe, expected_transcribe_duration, do_transcribe)
86
+
87
 
88
  def _return_yt_html_embed(yt_url):
89
  video_id = yt_url.split("?v=")[-1]
 
124
  except youtube_dl.utils.ExtractorError as err:
125
  raise gr.Error(str(err))
126
 
127
+
128
  def yt_transcribe(yt_url, max_filesize=75.0):
129
 
130
  with tempfile.TemporaryDirectory() as tmpdirname:
131
  filepath = os.path.join(tmpdirname, "video.mp4")
132
  download_yt_audio(yt_url, filepath)
133
+ text = transcribe(transcribe, filepath)
134
+
 
 
 
 
 
 
135
 
136
  return text
137