jadechoghari commited on
Commit
35f0b0b
1 Parent(s): 4e961a2
__pycache__/inference.cpython-310.pyc CHANGED
Binary files a/__pycache__/inference.cpython-310.pyc and b/__pycache__/inference.cpython-310.pyc differ
 
__pycache__/model_UI.cpython-310.pyc CHANGED
Binary files a/__pycache__/model_UI.cpython-310.pyc and b/__pycache__/model_UI.cpython-310.pyc differ
 
eval.json CHANGED
@@ -1 +1 @@
1
- [{"id": 0, "image": "temp_image.png", "image_h": 2532, "image_w": 1170, "conversations": [{"from": "human", "value": "<image>\nclassify this"}], "box_x1y1x2y2": [["455.0, 513.0, 729.0, 650.0"]]}]
 
1
+ [{"id": 0, "image": "temp_image.png", "image_h": 2532, "image_w": 1170, "conversations": [{"from": "human", "value": "<image>\nwhat you see <bbox_location0>"}], "box_x1y1x2y2": [[[446, 260, 1116, 443]]]}]
eval_output.jsonl/0_of_1.jsonl CHANGED
@@ -1 +1 @@
1
- {"id": 0, "image_path": "temp_image.png", "prompt": "classify this", "text": "Today, 4+", "label": null}
 
1
+ {"id": 0, "image_path": "temp_image.png", "prompt": "what you see [381, 102, 953, 174] <region_fea>", "text": "Reminders, Don't forget. Use reminders.", "label": null}
inference.py CHANGED
@@ -45,16 +45,24 @@ def inference_and_run(image_dir, image_path, prompt, conv_mode="ferret_gemma_ins
45
  """
46
  Run the inference and capture the errors for debugging.
47
  """
 
 
 
 
 
 
48
  data_input = [{
49
  "id": 0,
50
  "image": os.path.basename(image_path),
51
  "image_h": Image.open(image_path).height,
52
  "image_w": Image.open(image_path).width,
53
- "conversations": [{"from": "human", "value": f"<image>\n{prompt}"}]
54
  }]
55
 
56
  if box:
57
- data_input[0]["box_x1y1x2y2"] = [[box]]
 
 
58
 
59
  with open("eval.json", "w") as json_file:
60
  json.dump(data_input, json_file)
 
45
  """
46
  Run the inference and capture the errors for debugging.
47
  """
48
+
49
+
50
+ if box is not None:
51
+ conversation_value = f"<image>\n{prompt} <bbox_location0>"
52
+ else:
53
+ conversation_value = f"<image>\n{prompt}"
54
  data_input = [{
55
  "id": 0,
56
  "image": os.path.basename(image_path),
57
  "image_h": Image.open(image_path).height,
58
  "image_w": Image.open(image_path).width,
59
+ "conversations": [{"from": "human", "value": conversation_value}]
60
  }]
61
 
62
  if box:
63
+ box_numbers = [int(float(coord)) for coord in box.split(", ")]
64
+ # Structure it in the desired format
65
+ data_input[0]["box_x1y1x2y2"] = [[box_numbers]]
66
 
67
  with open("eval.json", "w") as json_file:
68
  json.dump(data_input, json_file)
model_UI.py CHANGED
@@ -122,7 +122,9 @@ class UIData:
122
  i['question'] = prompt
123
  i['region_masks'] = None
124
 
125
- if self.task == 'box_in':
 
 
126
  ratio_w = VOCAB_IMAGE_W * 1.0 / i['image_w']
127
  ratio_h = VOCAB_IMAGE_H * 1.0 / i['image_h']
128
 
 
122
  i['question'] = prompt
123
  i['region_masks'] = None
124
 
125
+ # if self.task == 'box_in':
126
+ # for the demo only
127
+ if self.args.region_format == 'box':
128
  ratio_w = VOCAB_IMAGE_W * 1.0 / i['image_w']
129
  ratio_h = VOCAB_IMAGE_H * 1.0 / i['image_h']
130