Spaces:
Runtime error
Runtime error
File size: 2,314 Bytes
5d656c3 f46c80d 5d656c3 9cdc5fa 5d656c3 278edb5 63cf721 f46c80d da90409 63cf721 5d656c3 f46c80d 9cdc5fa f46c80d 63cf721 9cdc5fa a6dcf15 9cdc5fa f115402 5d656c3 f115402 5d656c3 9cdc5fa 5d656c3 a6dcf15 a9f9972 9cdc5fa 5d656c3 a6dcf15 5d656c3 48b0c57 9cdc5fa 5d656c3 278edb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from faster_whisper import WhisperModel
from fastapi import FastAPI
from video import download_convert_video_to_audio
import yt_dlp
import uuid
import os
from fastapi.middleware.cors import CORSMiddleware
import gradio as gr
from pyngrok import ngrok
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
ngrok.set_auth_token("2NtE79QzuHf6t2uV7xiBni14GIx_L5YaQzN96KS61RWS9qi1")
ngrok.kill()
ngrok_tunnel = ngrok.connect(7860)
print(ngrok_tunnel.public_url)
# or run on GPU with INT8
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
# or run on CPU with INT8
def segment_to_dict(segment):
segment = segment._asdict()
if segment["words"] is not None:
segment["words"] = [word._asdict() for word in segment["words"]]
return segment
def ng_path(text):
return ngrok_tunnel.public_url
@app.post("/video")
async def download_video(video_url: str):
download_convert_video_to_audio(yt_dlp, video_url, f"/home/user/{uuid.uuid4().hex}")
@app.post("/transcribe")
async def transcribe_video(video_url: str, beam_size: int = 5, model_size: str = "tiny", word_timestamps: bool = True):
print("loading model")
model = WhisperModel(model_size, device="cpu", compute_type="int8")
print("getting hex")
rand_id = uuid.uuid4().hex
print("doing download")
download_convert_video_to_audio(yt_dlp, video_url, f"/home/user/{rand_id}")
segments, info = model.transcribe(f"/home/user/{rand_id}.mp3", beam_size=beam_size, word_timestamps=word_timestamps)
segments = [segment_to_dict(segment) for segment in segments]
total_duration = round(info.duration, 2) # Same precision as the Whisper timestamps.
print(info)
os.remove(f"/home/user/{rand_id}.mp3")
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
return segments
# print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
# for segment in segments:
# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
interface = gr.Interface(inputs="text",outputs="text", fn=ng_path, title="X", description="XX", allow_flagging=False)
interface.launch() |