File size: 1,922 Bytes
5d656c3
f46c80d
5d656c3
9cdc5fa
 
5d656c3
 
f46c80d
 
da90409
 
 
 
 
 
 
 
 
5d656c3
 
 
f46c80d
9cdc5fa
 
 
 
 
f46c80d
9cdc5fa
 
a6dcf15
9cdc5fa
 
f115402
5d656c3
f115402
5d656c3
9cdc5fa
5d656c3
a6dcf15
a9f9972
9cdc5fa
5d656c3
 
a6dcf15
5d656c3
 
48b0c57
9cdc5fa
5d656c3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from faster_whisper import WhisperModel
from fastapi import FastAPI
from video import download_convert_video_to_audio
import yt_dlp
import uuid
import os
from fastapi.middleware.cors import CORSMiddleware

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# or run on GPU with INT8
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
# or run on CPU with INT8

def segment_to_dict(segment):
    segment = segment._asdict()
    if segment["words"] is not None:
        segment["words"] = [word._asdict() for word in segment["words"]]
    return segment

@app.post("/video")
async def download_video(video_url: str):
    download_convert_video_to_audio(yt_dlp, video_url, f"/home/user/{uuid.uuid4().hex}")

@app.post("/transcribe")
async def transcribe_video(video_url: str, beam_size: int = 5, model_size: str = "tiny", word_timestamps: bool = True):
    print("loading model")
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    print("getting hex")
    rand_id = uuid.uuid4().hex
    print("doing download")
    download_convert_video_to_audio(yt_dlp, video_url, f"/home/user/{rand_id}")
    segments, info = model.transcribe(f"/home/user/{rand_id}.mp3", beam_size=beam_size, word_timestamps=word_timestamps)
    segments = [segment_to_dict(segment) for segment in segments]
    total_duration = round(info.duration, 2)  # Same precision as the Whisper timestamps.
    print(info)
    os.remove(f"/home/user/{rand_id}.mp3")
    print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
    
    return segments

# print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

# for segment in segments:
#     print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))