OS-ATLAS

Runtime error

App Files Files Community

maxiw commited on Nov 8, 2024

Commit

560a2a7

verified ·

1 Parent(s): 9b70520

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -11

app.py CHANGED Viewed

@@ -50,16 +50,16 @@ def rescale_bounding_boxes(bounding_boxes, original_width, original_height, scal
 @spaces.GPU
-def run_example(image, text_input, system_prompt, model_id="OS-Copilot/OS-Atlas-Base-7B"):
     model = models[model_id].eval()
     processor = processors[model_id]
     messages = [
         {
             "role": "user",
             "content": [
                 {"type": "image", "image": f"data:image;base64,{image_to_base64(image)}"},
-                {"type": "text", "text": text_input},
             ],
         }
     ]
@@ -108,8 +108,6 @@ css = """
     border: 1px solid #ccc;
   }
 """
-default_system_prompt = ""
 with gr.Blocks(css=css) as demo:
     gr.Markdown(
     """
@@ -120,7 +118,6 @@ with gr.Blocks(css=css) as demo:
             with gr.Column():
                 input_img = gr.Image(label="Input Image", type="pil")
                 model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="OS-Copilot/OS-Atlas-Base-7B")
-                system_prompt = gr.Textbox(label="System Prompt", value=default_system_prompt)
                 text_input = gr.Textbox(label="User Prompt")
                 submit_btn = gr.Button(value="Submit")
             with gr.Column():
@@ -130,17 +127,15 @@ with gr.Blocks(css=css) as demo:
         gr.Examples(
             examples=[
-                ["assets/image1.jpg", "detect goats", default_system_prompt],
-                ["assets/image2.jpg", "detect blue button", default_system_prompt],
-                ["assets/image3.jpg", "detect person on bike", default_system_prompt],
             ],
-            inputs=[input_img, text_input, system_prompt],
             outputs=[model_output_text, parsed_boxes, annotated_image],
             fn=run_example,
             cache_examples=True,
             label="Try examples"
         )
-        submit_btn.click(run_example, [input_img, text_input, system_prompt, model_selector], [model_output_text, parsed_boxes, annotated_image])
 demo.launch(debug=True)

 @spaces.GPU
+def run_example(image, text_input, model_id="OS-Copilot/OS-Atlas-Base-7B"):
     model = models[model_id].eval()
     processor = processors[model_id]
+    prompt = f"In this UI screenshot, what is the position of the element corresponding to the command \"{text_input}\" (with bbox)?"
     messages = [
         {
             "role": "user",
             "content": [
                 {"type": "image", "image": f"data:image;base64,{image_to_base64(image)}"},
+                {"type": "text", "text": prompt},
             ],
         }
     ]
     border: 1px solid #ccc;
   }
 """
 with gr.Blocks(css=css) as demo:
     gr.Markdown(
     """
             with gr.Column():
                 input_img = gr.Image(label="Input Image", type="pil")
                 model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="OS-Copilot/OS-Atlas-Base-7B")
                 text_input = gr.Textbox(label="User Prompt")
                 submit_btn = gr.Button(value="Submit")
             with gr.Column():
         gr.Examples(
             examples=[
+                ["assets/web_6f93090a-81f6-489e-bb35-1a2838b18c01.png", "select search textfield"],
             ],
+            inputs=[input_img, text_input],
             outputs=[model_output_text, parsed_boxes, annotated_image],
             fn=run_example,
             cache_examples=True,
             label="Try examples"
         )
+        submit_btn.click(run_example, [input_img, text_input, model_selector], [model_output_text, parsed_boxes, annotated_image])
 demo.launch(debug=True)