ayaanzaveri commited on
Commit
9cdc5fa
1 Parent(s): f46c80d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +38 -7
main.py CHANGED
@@ -1,13 +1,44 @@
 
1
  from fastapi import FastAPI
2
- from fastapi.responses import JSONResponse
 
 
 
3
 
4
  app = FastAPI()
 
5
 
6
- @app.get("/")
7
- def read_root():
8
- return {"Hello": "World!"}
 
9
 
 
 
 
 
 
10
 
11
- @app.get("/json")
12
- def read_json():
13
- return JSONResponse(content={"Hello": "World!"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from faster_whisper import WhisperModel
2
  from fastapi import FastAPI
3
+ from video import download_convert_video_to_audio
4
+ import yt_dlp
5
+ import uuid
6
+ import os
7
 
8
  app = FastAPI()
9
+ model_size = "tiny"
10
 
11
+ # or run on GPU with INT8
12
+ # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
13
+ # or run on CPU with INT8
14
+ model = WhisperModel(model_size, device="cpu", compute_type="int8")
15
 
16
+ def segment_to_dict(segment):
17
+ segment = segment._asdict()
18
+ if segment["words"] is not None:
19
+ segment["words"] = [word._asdict() for word in segment["words"]]
20
+ return segment
21
 
22
+ @app.post("/video")
23
+ async def download_video(video_url: str):
24
+ download_convert_video_to_audio(yt_dlp, video_url, f"/workspace/convo/videos/{uuid.uuid4().hex}")
25
+
26
+ @app.post("/transcribe")
27
+ async def transcribe_video(video_url: str, beam_size: int = 5):
28
+ print("doing hex")
29
+ rand_id = uuid.uuid4().hex
30
+ print("doing download")
31
+ download_convert_video_to_audio(yt_dlp, video_url, f"/workspace/convo/videos/{rand_id}")
32
+ segments, info = model.transcribe(f"/workspace/convo/videos/{rand_id}.mp3", beam_size=beam_size, word_timestamps=True)
33
+ segments = [segment_to_dict(segment) for segment in segments]
34
+ total_duration = round(info.duration, 2) # Same precision as the Whisper timestamps.
35
+ print(info)
36
+ os.remove(f"/workspace/convo/videos/{rand_id}.mp3")
37
+ print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
38
+
39
+ return segments
40
+
41
+ # print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
42
+
43
+ # for segment in segments:
44
+ # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))