Spaces:
Runtime error
Runtime error
File size: 1,969 Bytes
2e5e07d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import torch
import ast
import os
import cv2 as cv
from PIL import Image, ImageDraw, ImageFont
from decord import VideoReader, cpu
import torchvision
import numpy as np
def captioning(en_prompt_file, zh_prompt_file, input_video_dir, output_video_dir):
prompt_list = []
with open(en_prompt_file, 'r', encoding='utf-8') as f:
video_prompts = f.read()
video_fragments = ast.literal_eval(video_prompts)
for video_fragment in video_fragments:
prompt_list.append(video_fragment["video fragment description"])
video_fnames = []
for fname in os.listdir(input_video_dir):
try:
int(fname.split('.')[0])
video_fnames.append(fname)
except:
continue
video_fnames.sort(key=lambda x: int(x.split('.')[0]))
font_face = cv.FONT_HERSHEY_COMPLEX
if not os.path.exists(output_video_dir):
os.makedirs(output_video_dir)
for i in range(len(video_fnames)):
font_zh = ImageFont.truetype(font='MSYH.TTC', size=18)
fontScale = 0.4
video_path = os.path.join(input_video_dir, video_fnames[i])
video = VideoReader(video_path, ctx=cpu(0))
video = video[:].asnumpy()
(fw, fh), bh = cv.getTextSize(prompt_list[i], font_face, fontScale, 1)
pos_en = (int((video[0].shape[1] - fw) / 2), 300)
if pos_en[0] < 0:
scale = video[0].shape[1] / fw
fontScale *= scale
pos_en = (0, 300)
for j in range(video.shape[0]):
cv.putText(video[j], prompt_list[i], pos_en, font_face, fontScale, (255, 255, 255), 1, cv.LINE_AA)
img = Image.fromarray(cv.cvtColor(video[j], cv.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img)
img = np.array(img)
video[j] = cv.cvtColor(img, cv.COLOR_RGB2BGR)
torchvision.io.write_video(output_video_dir + "/" + str(i) + '.mp4', video, fps=8)
print("Caption OK", flush=True)
|