English
VideoCLIP-XL / modeling.py
jpWang's picture
initial commit
1622f6b
raw
history blame
431 Bytes
import os
from typing import List
import cv2
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from utils.text_encoder import text_encoder
from utils.vision_encoder import get_vision_encoder
class VideoCLIP_XL(nn.Module):
def __init__(self):
super(VideoCLIP_XL, self).__init__()
self.text_model = text_encoder.load().float()
self.vision_model = get_vision_encoder().float()