Spaces:

jadechoghari
/

ferret-demo

Running on Zero

jadechoghari commited on 13 days ago

Commit

35f0b0b

•

1 Parent(s): 4e961a2

fix bugs

Files changed (6) hide show

__pycache__/inference.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/inference.cpython-310.pyc and b/__pycache__/inference.cpython-310.pyc differ

__pycache__/model_UI.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/model_UI.cpython-310.pyc and b/__pycache__/model_UI.cpython-310.pyc differ

eval.json CHANGED Viewed

	@@ -1 +1 @@
1	- [{"id": 0, "image": "temp_image.png", "image_h": 2532, "image_w": 1170, "conversations": [{"from": "human", "value": "<image>\~~nclassify~~ ~~this~~"}], "box_x1y1x2y2": [[~~"455.0~~, ~~513.0~~, ~~729.0~~, ~~650.0"~~]]}]


1	+ [{"id": 0, "image": "temp_image.png", "image_h": 2532, "image_w": 1170, "conversations": [{"from": "human", "value": "<image>\nwhat you see <bbox_location0>"}], "box_x1y1x2y2": [[[446, 260, 1116, 443]]]}]

eval_output.jsonl/0_of_1.jsonl CHANGED Viewed

	@@ -1 +1 @@
1	- {"id": 0, "image_path": "temp_image.png", "prompt": "~~classify~~ ~~this~~", "text": "~~Today~~, 4+", "label": null}


1	+ {"id": 0, "image_path": "temp_image.png", "prompt": "what you see [381, 102, 953, 174] <region_fea>", "text": "Reminders, Don't forget. Use reminders.", "label": null}

inference.py CHANGED Viewed

@@ -45,16 +45,24 @@ def inference_and_run(image_dir, image_path, prompt, conv_mode="ferret_gemma_ins
     """
     Run the inference and capture the errors for debugging.
     """
     data_input = [{
         "id": 0,
         "image": os.path.basename(image_path),
         "image_h": Image.open(image_path).height,
         "image_w": Image.open(image_path).width,
-        "conversations": [{"from": "human", "value": f"<image>\n{prompt}"}]
     }]
     if box:
-        data_input[0]["box_x1y1x2y2"] = [[box]]
     with open("eval.json", "w") as json_file:
         json.dump(data_input, json_file)

     """
     Run the inference and capture the errors for debugging.
     """
+    if box is not None:
+        conversation_value = f"<image>\n{prompt} <bbox_location0>"
+    else:
+        conversation_value = f"<image>\n{prompt}"
     data_input = [{
         "id": 0,
         "image": os.path.basename(image_path),
         "image_h": Image.open(image_path).height,
         "image_w": Image.open(image_path).width,
+        "conversations": [{"from": "human", "value": conversation_value}]
     }]
     if box:
+        box_numbers = [int(float(coord)) for coord in box.split(", ")]
+        # Structure it in the desired format
+        data_input[0]["box_x1y1x2y2"] = [[box_numbers]]
     with open("eval.json", "w") as json_file:
         json.dump(data_input, json_file)

model_UI.py CHANGED Viewed

@@ -122,7 +122,9 @@ class UIData:
         i['question'] = prompt
         i['region_masks'] = None
-        if self.task == 'box_in':
             ratio_w = VOCAB_IMAGE_W * 1.0 / i['image_w']
             ratio_h = VOCAB_IMAGE_H * 1.0 / i['image_h']

         i['question'] = prompt
         i['region_masks'] = None
+        # if self.task == 'box_in':
+        # for the demo only
+        if self.args.region_format == 'box':
             ratio_w = VOCAB_IMAGE_W * 1.0 / i['image_w']
             ratio_h = VOCAB_IMAGE_H * 1.0 / i['image_h']