Spaces:
Runtime error
Runtime error
2424
Browse files- .gitignore +2 -1
- app.py +14 -4
- gradio.ipynb +87 -37
- visualization.py +26 -21
.gitignore
CHANGED
@@ -139,4 +139,5 @@ hico_20160224_det
|
|
139 |
v-coco
|
140 |
|
141 |
# *.ipynb
|
142 |
-
vis_res
|
|
|
|
139 |
v-coco
|
140 |
|
141 |
# *.ipynb
|
142 |
+
vis_res
|
143 |
+
flagged
|
app.py
CHANGED
@@ -1,7 +1,17 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
2 |
|
3 |
-
def
|
4 |
-
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from visualization import visualization
|
3 |
+
# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
|
4 |
+
# pipeline = pipeline(task="image-classification", model="jhp/hoi")
|
5 |
|
6 |
+
def predict(image,threshold,topk):
|
7 |
+
vis_img = visualization(image,threshold,topk)
|
8 |
+
return vis_img
|
9 |
|
10 |
+
gr.Interface(
|
11 |
+
predict,
|
12 |
+
inputs=[gr.Image(type='pil',label="input image"),
|
13 |
+
gr.Slider(0, 1, value=0.4, label="Threshold", info="Set detection score threshold between 0~1"),
|
14 |
+
gr.Number(value=5,label='Topk',info='Topk prediction')],
|
15 |
+
outputs= gr.Image(type="pil", label="hoi detection results"),
|
16 |
+
title="HOI detection",
|
17 |
+
).launch()
|
gradio.ipynb
CHANGED
@@ -2,38 +2,63 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"id": "531487e5-d72d-41be-b4ae-ccd9f8dc844e",
|
7 |
"metadata": {},
|
8 |
"outputs": [
|
9 |
{
|
10 |
-
"
|
11 |
-
"
|
12 |
-
"
|
13 |
-
|
14 |
-
"\
|
15 |
-
"\
|
16 |
-
"
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
"
|
22 |
-
|
23 |
-
|
24 |
-
"
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
"
|
36 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
]
|
38 |
}
|
39 |
],
|
@@ -42,25 +67,50 @@
|
|
42 |
"from transformers import pipeline\n",
|
43 |
"from visualization import visualization\n",
|
44 |
"# pipeline = pipeline(task=\"image-classification\", model=\"julien-c/hotdog-not-hotdog\")\n",
|
45 |
-
"pipeline = pipeline(task=\"image-classification\", model=\"jhp/hoi\")\n",
|
46 |
"\n",
|
47 |
-
"def predict(image):\n",
|
48 |
-
"
|
49 |
-
" return
|
50 |
"\n",
|
51 |
"gr.Interface(\n",
|
52 |
" predict,\n",
|
53 |
-
" inputs=gr.
|
54 |
-
"
|
55 |
-
"
|
56 |
-
"
|
|
|
|
|
57 |
]
|
58 |
},
|
59 |
{
|
60 |
"cell_type": "code",
|
61 |
-
"execution_count":
|
62 |
"id": "439a75e9-77e6-4932-9b9b-35e2d0b7a76b",
|
63 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
"outputs": [],
|
65 |
"source": []
|
66 |
}
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
"id": "531487e5-d72d-41be-b4ae-ccd9f8dc844e",
|
7 |
"metadata": {},
|
8 |
"outputs": [
|
9 |
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"Running on local URL: http://127.0.0.1:7860\n",
|
14 |
+
"Running on public URL: https://fc8effa414b728bb78.gradio.live\n",
|
15 |
+
"\n",
|
16 |
+
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"data": {
|
21 |
+
"text/html": [
|
22 |
+
"<div><iframe src=\"https://fc8effa414b728bb78.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
23 |
+
],
|
24 |
+
"text/plain": [
|
25 |
+
"<IPython.core.display.HTML object>"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"metadata": {},
|
29 |
+
"output_type": "display_data"
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"name": "stdout",
|
33 |
+
"output_type": "stream",
|
34 |
+
"text": [
|
35 |
+
"loading annotations into memory...\n",
|
36 |
+
"Done (t=1.67s)\n",
|
37 |
+
"creating index...\n",
|
38 |
+
"index created!\n",
|
39 |
+
"\n",
|
40 |
+
"[Logger] DETR Arguments:\n",
|
41 |
+
"\tlr: 0.0001\n",
|
42 |
+
"\tlr_backbone: 1e-05\n",
|
43 |
+
"\tlr_drop: 80\n",
|
44 |
+
"\tfrozen_weights: None\n",
|
45 |
+
"\tbackbone: resnet50\n",
|
46 |
+
"\tdilation: False\n",
|
47 |
+
"\tposition_embedding: sine\n",
|
48 |
+
"\tenc_layers: 6\n",
|
49 |
+
"\tdec_layers: 6\n",
|
50 |
+
"\tnum_queries: 100\n",
|
51 |
+
"\tdataset_file: vcoco\n",
|
52 |
+
"\n",
|
53 |
+
"[Logger] Number of params: 52413912\n"
|
54 |
+
]
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"name": "stderr",
|
58 |
+
"output_type": "stream",
|
59 |
+
"text": [
|
60 |
+
"/home/jihwan/CPC_HOTR/hotr/models/position_encoding.py:41: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n",
|
61 |
+
" dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)\n"
|
62 |
]
|
63 |
}
|
64 |
],
|
|
|
67 |
"from transformers import pipeline\n",
|
68 |
"from visualization import visualization\n",
|
69 |
"# pipeline = pipeline(task=\"image-classification\", model=\"julien-c/hotdog-not-hotdog\")\n",
|
70 |
+
"# pipeline = pipeline(task=\"image-classification\", model=\"jhp/hoi\")\n",
|
71 |
"\n",
|
72 |
+
"def predict(image,threshold,topk):\n",
|
73 |
+
" vis_img = visualization(image,threshold,topk)\n",
|
74 |
+
" return vis_img\n",
|
75 |
"\n",
|
76 |
"gr.Interface(\n",
|
77 |
" predict,\n",
|
78 |
+
" inputs=[gr.Image(type='pil',label=\"input image\"),\n",
|
79 |
+
" gr.Slider(0, 1, value=0.4, label=\"Threshold\", info=\"Set detection score threshold between 0~1\"),\n",
|
80 |
+
" gr.Number(value=5,label='Topk',info='Topk prediction')],\n",
|
81 |
+
" outputs= gr.Image(type=\"pil\", label=\"hoi detection results\"),\n",
|
82 |
+
" title=\"HOI detection\",\n",
|
83 |
+
").launch(share=True,debug=True)"
|
84 |
]
|
85 |
},
|
86 |
{
|
87 |
"cell_type": "code",
|
88 |
+
"execution_count": 1,
|
89 |
"id": "439a75e9-77e6-4932-9b9b-35e2d0b7a76b",
|
90 |
"metadata": {},
|
91 |
+
"outputs": [
|
92 |
+
{
|
93 |
+
"ename": "TypeError",
|
94 |
+
"evalue": "string indices must be integers",
|
95 |
+
"output_type": "error",
|
96 |
+
"traceback": [
|
97 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
98 |
+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
99 |
+
"Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m a\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msdsd\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 2\u001b[0m \u001b[43ma\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[43m,\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\n",
|
100 |
+
"\u001b[0;31mTypeError\u001b[0m: string indices must be integers"
|
101 |
+
]
|
102 |
+
}
|
103 |
+
],
|
104 |
+
"source": [
|
105 |
+
"a='sdsd'\n",
|
106 |
+
"a[:,:]\n"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"cell_type": "code",
|
111 |
+
"execution_count": null,
|
112 |
+
"id": "96fc750d-1869-4c83-87ad-d4ef909bbddb",
|
113 |
+
"metadata": {},
|
114 |
"outputs": [],
|
115 |
"source": []
|
116 |
}
|
visualization.py
CHANGED
@@ -51,7 +51,7 @@ def change_format(results,valid_ids):
|
|
51 |
output_i['hoi_prediction'].append({'subject_id':hum,'object_id':k,'category_id':i+2,'score':verb[j][k]})
|
52 |
|
53 |
return output_i
|
54 |
-
def vis(args,id=294,return_img=False):
|
55 |
|
56 |
if args.frozen_weights is not None:
|
57 |
print("Freeze weights for detector")
|
@@ -116,8 +116,13 @@ def vis(args,id=294,return_img=False):
|
|
116 |
# if not args.video_vis:
|
117 |
# url='http://images.cocodataset.org/val2014/COCO_val2014_{}.jpg'.format(str(id).zfill(12))
|
118 |
# req = requests.get(url, stream=True, timeout=1, verify=False).raw
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
w,h=img.size
|
123 |
orig_size = torch.as_tensor([int(h), int(w)]).unsqueeze(0).to(device)
|
@@ -138,8 +143,9 @@ def vis(args,id=294,return_img=False):
|
|
138 |
|
139 |
vis_img=draw_img_vcoco(image,output_i,top_k=args.topk,threshold=args.threshold,color=builtin_meta.COCO_CATEGORIES)
|
140 |
plt.imshow(cv2.cvtColor(vis_img,cv2.COLOR_BGR2RGB))
|
|
|
141 |
if return_img:
|
142 |
-
return vis_img
|
143 |
else:
|
144 |
cv2.imwrite('./vis_res/vis1.jpg',vis_img)
|
145 |
|
@@ -203,33 +209,32 @@ def vis(args,id=294,return_img=False):
|
|
203 |
# vis(args,id)
|
204 |
|
205 |
# 230727 for huggingface
|
206 |
-
def visualization(
|
207 |
|
208 |
parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
|
209 |
-
parser.
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
parser.add_argument('--image_dir', default='', type=str)
|
214 |
-
args = parser.parse_args()
|
215 |
# checkpoint_dir= './checkpoints/vcoco/checkpoint.pth' if dataset_file=='vcoco' else './checkpoints/hico-det/hico_ft_q16.pth'
|
216 |
args.resume= './checkpoints/vcoco/checkpoint.pth'
|
217 |
-
with open('./v-coco/data/splits/vcoco_test.ids') as file:
|
218 |
-
|
219 |
-
# if not video_vis:
|
220 |
-
id = test_idxs[309]
|
221 |
# args = parser.parse_args()
|
222 |
-
|
223 |
-
|
224 |
# args.resume = checkpoint_dir
|
225 |
-
|
226 |
-
|
227 |
args.augpath_name = ['p2','p3','p4']
|
228 |
# args.path_id = 1
|
229 |
-
|
|
|
230 |
if args.output_dir:
|
231 |
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
232 |
-
vis(args,return_img=
|
233 |
|
234 |
if __name__ == '__main__':
|
235 |
parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
|
|
|
51 |
output_i['hoi_prediction'].append({'subject_id':hum,'object_id':k,'category_id':i+2,'score':verb[j][k]})
|
52 |
|
53 |
return output_i
|
54 |
+
def vis(args,input_img=None,id=294,return_img=False):
|
55 |
|
56 |
if args.frozen_weights is not None:
|
57 |
print("Freeze weights for detector")
|
|
|
116 |
# if not args.video_vis:
|
117 |
# url='http://images.cocodataset.org/val2014/COCO_val2014_{}.jpg'.format(str(id).zfill(12))
|
118 |
# req = requests.get(url, stream=True, timeout=1, verify=False).raw
|
119 |
+
|
120 |
+
if input_img is None:
|
121 |
+
req = args.image_dir
|
122 |
+
img = Image.open(req).convert('RGB')
|
123 |
+
else:
|
124 |
+
# import pdb;pdb.set_trace()
|
125 |
+
img = input_img
|
126 |
|
127 |
w,h=img.size
|
128 |
orig_size = torch.as_tensor([int(h), int(w)]).unsqueeze(0).to(device)
|
|
|
143 |
|
144 |
vis_img=draw_img_vcoco(image,output_i,top_k=args.topk,threshold=args.threshold,color=builtin_meta.COCO_CATEGORIES)
|
145 |
plt.imshow(cv2.cvtColor(vis_img,cv2.COLOR_BGR2RGB))
|
146 |
+
# import pdb;pdb.set_trace()
|
147 |
if return_img:
|
148 |
+
return Image.fromarray(vis_img)
|
149 |
else:
|
150 |
cv2.imwrite('./vis_res/vis1.jpg',vis_img)
|
151 |
|
|
|
209 |
# vis(args,id)
|
210 |
|
211 |
# 230727 for huggingface
|
212 |
+
def visualization(input_img,threshold,topk):
|
213 |
|
214 |
parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
|
215 |
+
args = parser.parse_args(args=[])
|
216 |
+
args.threshold = threshold
|
217 |
+
args.topk = int(topk)
|
218 |
+
|
|
|
|
|
219 |
# checkpoint_dir= './checkpoints/vcoco/checkpoint.pth' if dataset_file=='vcoco' else './checkpoints/hico-det/hico_ft_q16.pth'
|
220 |
args.resume= './checkpoints/vcoco/checkpoint.pth'
|
221 |
+
# with open('./v-coco/data/splits/vcoco_test.ids') as file:
|
222 |
+
# test_idxs = [line.rstrip('\n') for line in file]
|
223 |
+
# # if not video_vis:
|
224 |
+
# id = test_idxs[309]
|
225 |
# args = parser.parse_args()
|
226 |
+
args.dataset_file = 'vcoco'
|
227 |
+
args.data_path = 'v-coco'
|
228 |
# args.resume = checkpoint_dir
|
229 |
+
args.num_hoi_queries = 16
|
230 |
+
args.temperature = 0.05
|
231 |
args.augpath_name = ['p2','p3','p4']
|
232 |
# args.path_id = 1
|
233 |
+
# args.threshold = threshold
|
234 |
+
# args.topk = topk
|
235 |
if args.output_dir:
|
236 |
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
237 |
+
vis(args,input_img=input_img,return_img=True)
|
238 |
|
239 |
if __name__ == '__main__':
|
240 |
parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
|