Spaces:
Paused
Paused
Update app_d.py
Browse files
app_d.py
CHANGED
@@ -5,7 +5,65 @@ import numpy as np
|
|
5 |
from moviepy.editor import *
|
6 |
#from share_btn import community_icon_html, loading_icon_html, share_js
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def get_frames(video_in):
|
11 |
frames = []
|
@@ -69,7 +127,8 @@ def infer(prompt,video_in, trim_value):
|
|
69 |
print("set stop frames to: " + str(n_frame))
|
70 |
|
71 |
for i in frames_list[0:int(n_frame)]:
|
72 |
-
xdecoder_img = xdecoder(i, prompt, fn_index=0)
|
|
|
73 |
#res_image = xdecoder_img[0]
|
74 |
#rgb_im = images[0].convert("RGB")
|
75 |
|
|
|
5 |
from moviepy.editor import *
|
6 |
#from share_btn import community_icon_html, loading_icon_html, share_js
|
7 |
|
8 |
+
|
9 |
+
os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")
|
10 |
+
|
11 |
+
|
12 |
+
import torch
|
13 |
+
import argparse
|
14 |
+
|
15 |
+
from xdecoder.BaseModel import BaseModel
|
16 |
+
from xdecoder import build_model
|
17 |
+
from utils.distributed import init_distributed
|
18 |
+
from utils.arguments import load_opt_from_config_files
|
19 |
+
|
20 |
+
from tasks import *
|
21 |
+
|
22 |
+
def parse_option():
|
23 |
+
parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
|
24 |
+
parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
|
25 |
+
args = parser.parse_args()
|
26 |
+
|
27 |
+
return args
|
28 |
+
|
29 |
+
'''
|
30 |
+
build args
|
31 |
+
'''
|
32 |
+
args = parse_option()
|
33 |
+
opt = load_opt_from_config_files(args.conf_files)
|
34 |
+
opt = init_distributed(opt)
|
35 |
+
|
36 |
+
# META DATA
|
37 |
+
pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
|
38 |
+
pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")
|
39 |
+
|
40 |
+
if not os.path.exists(pretrained_pth_last):
|
41 |
+
os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))
|
42 |
+
|
43 |
+
if not os.path.exists(pretrained_pth_novg):
|
44 |
+
os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))
|
45 |
+
|
46 |
+
|
47 |
+
'''
|
48 |
+
build model
|
49 |
+
'''
|
50 |
+
model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()
|
51 |
+
|
52 |
+
with torch.no_grad():
|
53 |
+
model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
|
54 |
+
|
55 |
+
'''
|
56 |
+
inference model
|
57 |
+
'''
|
58 |
+
|
59 |
+
@torch.no_grad()
|
60 |
+
def xdecoder(image, instruction, *args, **kwargs):
|
61 |
+
image = image.convert("RGB")
|
62 |
+
with torch.autocast(device_type='cuda', dtype=torch.float16):
|
63 |
+
return referring_inpainting_gpt3(model_last, image, instruction, *args, **kwargs)
|
64 |
+
|
65 |
+
|
66 |
+
#xdecoder = gr.Interface.load(name="spaces/xdecoder/Instruct-X-Decoder")
|
67 |
|
68 |
def get_frames(video_in):
|
69 |
frames = []
|
|
|
127 |
print("set stop frames to: " + str(n_frame))
|
128 |
|
129 |
for i in frames_list[0:int(n_frame)]:
|
130 |
+
#xdecoder_img = xdecoder(i, prompt, fn_index=0)
|
131 |
+
xdecoder_img = xdecoder(i, prompt)
|
132 |
#res_image = xdecoder_img[0]
|
133 |
#rgb_im = images[0].convert("RGB")
|
134 |
|