Spaces:

fffiloni
/

x-decoder-video

Paused

App Files Files Community

fffiloni commited on Feb 16, 2023

Commit

1ddab6e

1 Parent(s): 3a7df69

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -66

app.py CHANGED Viewed

@@ -2,70 +2,13 @@ import gradio as gr
 import os
 import cv2
 import numpy as np
-from PIL import Image
 from moviepy.editor import *
 #from share_btn import community_icon_html, loading_icon_html, share_js
-os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")
-import torch
-import argparse
-from xdecoder.BaseModel import BaseModel
-from xdecoder import build_model
-from utils.distributed import init_distributed
-from utils.arguments import load_opt_from_config_files
-from tasks import *
-def parse_option():
-    parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
-    parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
-    args = parser.parse_args()
-    return args
-'''
-build args
-'''
-args = parse_option()
-opt = load_opt_from_config_files(args.conf_files)
-opt = init_distributed(opt)
-# META DATA
-pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
-pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")
-if not os.path.exists(pretrained_pth_last):
-    os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))
-if not os.path.exists(pretrained_pth_novg):
-    os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))
-'''
-build model
-'''
-model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()
-with torch.no_grad():
-    model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
-'''
-inference model
-'''
-@torch.no_grad()
-def xdecoder(image, instruction, *args, **kwargs):
-    image = Image.open(image)
-    image = image.convert("RGB")
-    with torch.autocast(device_type='cuda', dtype=torch.float16):
-        return referring_inpainting_gpt3(model_last, image, instruction, *args, **kwargs)
-#xdecoder = gr.Interface.load(name="spaces/xdecoder/Instruct-X-Decoder")
 def get_frames(video_in):
     frames = []
@@ -129,13 +72,7 @@ def infer(prompt,video_in, trim_value):
     print("set stop frames to: " + str(n_frame))
     for i in frames_list[0:int(n_frame)]:
-        #xdecoder_img = xdecoder(i, prompt, fn_index=0)
-        xdecoder_img = xdecoder(i, prompt)
-        #res_image = xdecoder_img[0]
-        #rgb_im = images[0].convert("RGB")
-        # exporting the image
-        #res_image.save(f"result_img-{i}.jpg")
         result_frames.append(xdecoder_img)
         print("frame " + i + "/" + str(n_frame) + ": done;")

 import os
 import cv2
 import numpy as np
 from moviepy.editor import *
 #from share_btn import community_icon_html, loading_icon_html, share_js
+xdecoder = gr.Interface.load(name="spaces/xdecoder/Instruct-X-Decoder")
 def get_frames(video_in):
     frames = []
     print("set stop frames to: " + str(n_frame))
     for i in frames_list[0:int(n_frame)]:
+        xdecoder_img = xdecoder(i, prompt, fn_index=0)
         result_frames.append(xdecoder_img)
         print("frame " + i + "/" + str(n_frame) + ": done;")