Spaces:

jadechoghari
/

ferret-demo

Running on Zero

App Files Files Community

jadechoghari commited on Oct 22, 2024

Commit

3bdab0b

1 Parent(s): 151137d

add final fixes

Browse files

Files changed (1) hide show

app.py +65 -19

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ model_name = 'Ferret-UI'
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 @spaces.GPU()
-def inference_with_gradio(chatbot, image, prompt, model_path, box=None):
     dir_path = os.path.dirname(image)
     # image_path = image
     # Define the directory where you want to save the image (current directory)
@@ -37,10 +37,11 @@ def inference_with_gradio(chatbot, image, prompt, model_path, box=None):
         image_dir=dir_path,
         prompt=prompt,
         model_path="jadechoghari/Ferret-UI-Gemma2b",
-        conv_mode=conv_mode,  # Default mode from the original function
-        # temperature=temperature,
-        # top_p=top_p,
-        # max_new_tokens=max_new_tokens,
         # stop=stop    # Assuming we want to process the image
         )
@@ -58,11 +59,11 @@ def inference_with_gradio(chatbot, image, prompt, model_path, box=None):
 def submit_chat(chatbot, text_input):
     response = ''
-    chatbot.append((text_input, response))
     return chatbot, ''
 def clear_chat():
-    return [], None, ""
 with open(f"{cur_dir}/logo.svg", "r", encoding="utf-8") as svg_file:
     svg_content = svg_file.read()
@@ -75,6 +76,42 @@ html = f"""
 </p>
 <center><font size=3><b>{model_name}</b> Demo: Upload an image, provide a prompt, and get insights using advanced AI models. <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b'>😊 Huggingface</a></font></center>
 """
 latex_delimiters_set = [{
         "left": "\\(",
@@ -99,24 +136,33 @@ model_dropdown = gr.Dropdown(choices=[
 ], label="Model Path", value="jadechoghari/Ferret-UI-Gemma2b")
 bounding_box_input = gr.Textbox(placeholder="Optional bounding box (x1, y1, x2, y2)", label="Bounding Box (optional)")
 chatbot = gr.Chatbot(label="Chat with Ferret-UI", height=400, show_copy_button=True, latex_delimiters=latex_delimiters_set)
 with gr.Blocks(title=model_name, theme=gr.themes.Ocean()) as demo:
     gr.HTML(html)
     with gr.Row():
         with gr.Column(scale=3):
-            # gr.Examples(
-            #     examples=[
-            #         ["appstore_reminders.png", "Describe the image in details", "jadechoghari/Ferret-UI-Gemma2b", None],
-            #         ["appstore_reminders.png", "What's inside the selected region?", "jadechoghari/Ferret-UI-Gemma2b", "189, 906, 404, 970"],
-            #         ["appstore_reminders.png", "Where is the Game Tab?", "jadechoghari/Ferret-UI-Gemma2b", None],
-            #     ],
-            #     inputs=[image_input, text_input, model_dropdown, bounding_box_input]
-            # )
             image_input.render()
             text_input.render()
             model_dropdown.render()
             bounding_box_input.render()
         with gr.Column(scale=7):
             chatbot.render()
             with gr.Row():
@@ -124,12 +170,12 @@ with gr.Blocks(title=model_name, theme=gr.themes.Ocean()) as demo:
                 clear_btn = gr.Button("Clear", variant="secondary")
     send_click_event = send_btn.click(
-        inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input], chatbot
     ).then(submit_chat, [chatbot, text_input], [chatbot, text_input])
     submit_event = text_input.submit(
-        inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input], chatbot
     ).then(submit_chat, [chatbot, text_input], [chatbot, text_input])
-    clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input, bounding_box_input])
-demo.launch()

 cur_dir = os.path.dirname(os.path.abspath(__file__))
 @spaces.GPU()
+def inference_with_gradio(chatbot, image, prompt, model_path, box=None, temperature=0.2, top_p=0.7, max_new_tokens=512):
     dir_path = os.path.dirname(image)
     # image_path = image
     # Define the directory where you want to save the image (current directory)
         image_dir=dir_path,
         prompt=prompt,
         model_path="jadechoghari/Ferret-UI-Gemma2b",
+        conv_mode=conv_mode,
+        temperature=temperature,
+        top_p=top_p,
+        box=box,
+        max_new_tokens=max_new_tokens,
         # stop=stop    # Assuming we want to process the image
         )
 def submit_chat(chatbot, text_input):
     response = ''
+    # chatbot.append((text_input, response))
     return chatbot, ''
 def clear_chat():
+    return [], None, "", "", 0.2, 0.7, 512
 with open(f"{cur_dir}/logo.svg", "r", encoding="utf-8") as svg_file:
     svg_content = svg_file.read()
 </p>
 <center><font size=3><b>{model_name}</b> Demo: Upload an image, provide a prompt, and get insights using advanced AI models. <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b'>😊 Huggingface</a></font></center>
 """
+with open(f"{cur_dir}/ferretui_icon.png", "rb") as image_file:
+    image_data = image_file.read()
+# html = f"""
+# <p align="center">
+#     <img src='data:image/png;base64,{image_data.encode("base64").decode("utf-8")}' alt='Ferret-UI' style='width: 100px; vertical-align: middle; border-radius: 15px; box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);'/>
+#     <span style="font-size: 2em; font-weight: bold; margin-left: 10px; vertical-align: middle;">{model_name}</span>
+# </p>
+# <center><font size=3><b>{model_name}</b> Demo: Upload an image, provide a prompt, and get insights using advanced AI models. <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b'>😊 Huggingface</a></font></center>
+# """
+html = f"""
+<div style="text-align: center; padding: 20px;">
+    <div style="display: inline-block; background-color: #f5f5f7; padding: 20px; border-radius: 20px; box-shadow: 0px 6px 20px rgba(0, 0, 0, 0.1);">
+        <div style="display: flex; align-items: center;">
+            <img src='https://github.com/apple/ml-ferret/blob/main/ferretui/figs/ferretui_icon.png?raw=true' alt='Ferret-UI'
+                style='width: 80px; height: 80px; border-radius: 20px; box-shadow: 0px 8px 16px rgba(0, 0, 0, 0.2);'/>
+            <div style="margin-left: 15px;">
+                <h1 style="font-size: 2.8em; font-family: -apple-system, BlinkMacSystemFont, sans-serif; color: #1D1D1F;
+                font-weight: bold; margin-bottom: 0;"> {model_name}</h1>
+                <p style="font-size: 1.2em; color: #6e6e73; font-family: -apple-system, BlinkMacSystemFont, sans-serif; margin-top: 5px;">
+                    📱 Grounded Mobile UI Understanding with Multimodal LLMs.<br>
+                    A new MLLM tailored for enhanced understanding of mobile UI screens, equipped with referring, grounding, and reasoning capabilities.
+                </p>
+                <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b' style='text-decoration: none;'>
+                    <button style="background-color: #007aff; color: white; font-size: 1.2em; padding: 10px 20px; border-radius: 10px; border: none; margin-top: 10px; box-shadow: 0px 4px 12px rgba(0, 122, 255, 0.4); cursor: pointer;">
+                        🤗 Try on Hugging Face
+                    </button>
+                </a>
+            </div>
+        </div>
+    </div>
+    <p style="font-size: 1.2em; color: #86868B; font-family: -apple-system, BlinkMacSystemFont, sans-serif; margin-top: 30px;">
+        We release two Ferret-UI checkpoints, built on gemma-2b and Llama-3-8B models respectively, for public exploration. 🚀
+    </p>
+</div>
+"""
 latex_delimiters_set = [{
         "left": "\\(",
 ], label="Model Path", value="jadechoghari/Ferret-UI-Gemma2b")
 bounding_box_input = gr.Textbox(placeholder="Optional bounding box (x1, y1, x2, y2)", label="Bounding Box (optional)")
+# Adding Sliders for temperature, top_p, and max_new_tokens
+temperature_input = gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=0.2, label="Temperature")
+top_p_input = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label="Top P")
+max_new_tokens_input = gr.Slider(minimum=1, maximum=1024, step=1, value=512, label="Max New Tokens")
 chatbot = gr.Chatbot(label="Chat with Ferret-UI", height=400, show_copy_button=True, latex_delimiters=latex_delimiters_set)
 with gr.Blocks(title=model_name, theme=gr.themes.Ocean()) as demo:
     gr.HTML(html)
     with gr.Row():
         with gr.Column(scale=3):
             image_input.render()
             text_input.render()
             model_dropdown.render()
             bounding_box_input.render()
+            temperature_input.render()    # Render temperature input
+            top_p_input.render()          # Render top_p input
+            max_new_tokens_input.render()
+            gr.Examples(
+                examples=[
+                    ["appstore_reminders.png", "Describe the image in details", "jadechoghari/Ferret-UI-Gemma2b", None],
+                    ["appstore_reminders.png", "What's inside the selected region?", "jadechoghari/Ferret-UI-Gemma2b", "189, 906, 404, 970"],
+                    ["appstore_reminders.png", "Where is the Game Tab?", "jadechoghari/Ferret-UI-Gemma2b", None],
+                ],
+                inputs=[image_input, text_input, model_dropdown, bounding_box_input]
+            )
         with gr.Column(scale=7):
             chatbot.render()
             with gr.Row():
                 clear_btn = gr.Button("Clear", variant="secondary")
     send_click_event = send_btn.click(
+        inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input], chatbot
     ).then(submit_chat, [chatbot, text_input], [chatbot, text_input])
     submit_event = text_input.submit(
+        inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input], chatbot
     ).then(submit_chat, [chatbot, text_input], [chatbot, text_input])
+    clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input])
+demo.launch()