ayaanzaveri commited on
Commit
4672515
1 Parent(s): 48b0c57
Files changed (1) hide show
  1. main.py +23 -18
main.py CHANGED
@@ -1,10 +1,12 @@
1
- from faster_whisper import WhisperModel
2
  from fastapi import FastAPI
3
- from video import download_convert_video_to_audio
 
4
  import yt_dlp
5
  import uuid
6
- import os
7
- from fastapi.middleware.cors import CORSMiddleware
 
 
8
 
9
  app = FastAPI()
10
 
@@ -16,9 +18,10 @@ app.add_middleware(
16
  allow_headers=["*"],
17
  )
18
 
19
- # or run on GPU with INT8
20
- # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
21
- # or run on CPU with INT8
 
22
 
23
  def segment_to_dict(segment):
24
  segment = segment._asdict()
@@ -28,26 +31,28 @@ def segment_to_dict(segment):
28
 
29
  @app.post("/video")
30
  async def download_video(video_url: str):
 
31
  download_convert_video_to_audio(yt_dlp, video_url, f"/home/user/{uuid.uuid4().hex}")
 
32
 
33
  @app.post("/transcribe")
34
  async def transcribe_video(video_url: str, beam_size: int = 5, model_size: str = "tiny", word_timestamps: bool = True):
35
- print("loading model")
36
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
37
- print("getting hex")
 
38
  rand_id = uuid.uuid4().hex
39
- print("doing download")
40
  download_convert_video_to_audio(yt_dlp, video_url, f"/home/user/{rand_id}")
 
 
41
  segments, info = model.transcribe(f"/home/user/{rand_id}.mp3", beam_size=beam_size, word_timestamps=word_timestamps)
42
  segments = [segment_to_dict(segment) for segment in segments]
43
- total_duration = round(info.duration, 2) # Same precision as the Whisper timestamps.
44
- print(info)
45
  os.remove(f"/home/user/{rand_id}.mp3")
46
- print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
47
-
48
  return segments
49
 
50
- # print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
51
-
52
- # for segment in segments:
53
- # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
 
 
1
  from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import os
4
  import yt_dlp
5
  import uuid
6
+ from typing import Dict
7
+
8
+ from faster_whisper import WhisperModel
9
+ from video import download_convert_video_to_audio
10
 
11
  app = FastAPI()
12
 
 
18
  allow_headers=["*"],
19
  )
20
 
21
+ # Create a dictionary to store the status of the current transcribing process
22
+ # This will be updated as the process progresses
23
+ # The key is a string describing the current process, and the value is a string describing the current status
24
+ status: Dict[str, str] = {}
25
 
26
  def segment_to_dict(segment):
27
  segment = segment._asdict()
 
31
 
32
  @app.post("/video")
33
  async def download_video(video_url: str):
34
+ status["video"] = "Downloading..."
35
  download_convert_video_to_audio(yt_dlp, video_url, f"/home/user/{uuid.uuid4().hex}")
36
+ status["video"] = "Downloaded"
37
 
38
  @app.post("/transcribe")
39
  async def transcribe_video(video_url: str, beam_size: int = 5, model_size: str = "tiny", word_timestamps: bool = True):
40
+ status["model"] = "Loading..."
41
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
42
+ status["model"] = "Loaded"
43
+ status["video"] = "Downloading..."
44
  rand_id = uuid.uuid4().hex
 
45
  download_convert_video_to_audio(yt_dlp, video_url, f"/home/user/{rand_id}")
46
+ status["video"] = "Downloaded"
47
+ status["transcription"] = "Transcribing..."
48
  segments, info = model.transcribe(f"/home/user/{rand_id}.mp3", beam_size=beam_size, word_timestamps=word_timestamps)
49
  segments = [segment_to_dict(segment) for segment in segments]
50
+ total_duration = round(info.duration, 2)
 
51
  os.remove(f"/home/user/{rand_id}.mp3")
52
+ status["transcription"] = "Completed"
53
+ status["language"] = "Detected language '%s' with probability %f" % (info.language, info.language_probability)
54
  return segments
55
 
56
+ @app.get("/status")
57
+ async def get_status():
58
+ return status