Spaces:

Vchitect
/

Vlogger-ShowMaker

Runtime error

File size: 1,969 Bytes

2e5e07d

import torch
import ast
import os
import cv2 as cv
from PIL import Image, ImageDraw, ImageFont
from decord import VideoReader, cpu
import torchvision
import numpy as np


def captioning(en_prompt_file, zh_prompt_file, input_video_dir, output_video_dir):
    prompt_list = []
    with open(en_prompt_file, 'r', encoding='utf-8') as f:
        video_prompts = f.read()
        video_fragments = ast.literal_eval(video_prompts)
        for video_fragment in video_fragments:
            prompt_list.append(video_fragment["video fragment description"])
            
    video_fnames = []
    for fname in os.listdir(input_video_dir):
        try:
            int(fname.split('.')[0])
            video_fnames.append(fname)
        except:
            continue
    video_fnames.sort(key=lambda x: int(x.split('.')[0]))

    font_face = cv.FONT_HERSHEY_COMPLEX
    if not os.path.exists(output_video_dir):
        os.makedirs(output_video_dir)
    for i in range(len(video_fnames)):
        font_zh = ImageFont.truetype(font='MSYH.TTC', size=18)
        fontScale = 0.4
        video_path = os.path.join(input_video_dir, video_fnames[i])
        video = VideoReader(video_path, ctx=cpu(0))
        video = video[:].asnumpy()
        (fw, fh), bh = cv.getTextSize(prompt_list[i], font_face, fontScale, 1)
        pos_en = (int((video[0].shape[1] - fw) / 2), 300)
        if pos_en[0] < 0:
            scale = video[0].shape[1] / fw
            fontScale *= scale
            pos_en = (0, 300)
        for j in range(video.shape[0]):
            cv.putText(video[j], prompt_list[i], pos_en, font_face, fontScale, (255, 255, 255), 1, cv.LINE_AA)
            img = Image.fromarray(cv.cvtColor(video[j], cv.COLOR_BGR2RGB))
            draw = ImageDraw.Draw(img)
            img = np.array(img)
            video[j] = cv.cvtColor(img, cv.COLOR_RGB2BGR)
        torchvision.io.write_video(output_video_dir + "/" + str(i) + '.mp4', video, fps=8)
    print("Caption OK", flush=True)