jadechoghari commited on
Commit
3bdab0b
β€’
1 Parent(s): 151137d

add final fixes

Browse files
Files changed (1) hide show
  1. app.py +65 -19
app.py CHANGED
@@ -9,7 +9,7 @@ model_name = 'Ferret-UI'
9
  cur_dir = os.path.dirname(os.path.abspath(__file__))
10
 
11
  @spaces.GPU()
12
- def inference_with_gradio(chatbot, image, prompt, model_path, box=None):
13
  dir_path = os.path.dirname(image)
14
  # image_path = image
15
  # Define the directory where you want to save the image (current directory)
@@ -37,10 +37,11 @@ def inference_with_gradio(chatbot, image, prompt, model_path, box=None):
37
  image_dir=dir_path,
38
  prompt=prompt,
39
  model_path="jadechoghari/Ferret-UI-Gemma2b",
40
- conv_mode=conv_mode, # Default mode from the original function
41
- # temperature=temperature,
42
- # top_p=top_p,
43
- # max_new_tokens=max_new_tokens,
 
44
  # stop=stop # Assuming we want to process the image
45
  )
46
 
@@ -58,11 +59,11 @@ def inference_with_gradio(chatbot, image, prompt, model_path, box=None):
58
 
59
  def submit_chat(chatbot, text_input):
60
  response = ''
61
- chatbot.append((text_input, response))
62
  return chatbot, ''
63
 
64
  def clear_chat():
65
- return [], None, ""
66
 
67
  with open(f"{cur_dir}/logo.svg", "r", encoding="utf-8") as svg_file:
68
  svg_content = svg_file.read()
@@ -75,6 +76,42 @@ html = f"""
75
  </p>
76
  <center><font size=3><b>{model_name}</b> Demo: Upload an image, provide a prompt, and get insights using advanced AI models. <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b'>😊 Huggingface</a></font></center>
77
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  latex_delimiters_set = [{
80
  "left": "\\(",
@@ -99,24 +136,33 @@ model_dropdown = gr.Dropdown(choices=[
99
  ], label="Model Path", value="jadechoghari/Ferret-UI-Gemma2b")
100
 
101
  bounding_box_input = gr.Textbox(placeholder="Optional bounding box (x1, y1, x2, y2)", label="Bounding Box (optional)")
 
 
 
 
 
 
102
  chatbot = gr.Chatbot(label="Chat with Ferret-UI", height=400, show_copy_button=True, latex_delimiters=latex_delimiters_set)
103
 
104
  with gr.Blocks(title=model_name, theme=gr.themes.Ocean()) as demo:
105
  gr.HTML(html)
106
  with gr.Row():
107
  with gr.Column(scale=3):
108
- # gr.Examples(
109
- # examples=[
110
- # ["appstore_reminders.png", "Describe the image in details", "jadechoghari/Ferret-UI-Gemma2b", None],
111
- # ["appstore_reminders.png", "What's inside the selected region?", "jadechoghari/Ferret-UI-Gemma2b", "189, 906, 404, 970"],
112
- # ["appstore_reminders.png", "Where is the Game Tab?", "jadechoghari/Ferret-UI-Gemma2b", None],
113
- # ],
114
- # inputs=[image_input, text_input, model_dropdown, bounding_box_input]
115
- # )
116
  image_input.render()
117
  text_input.render()
118
  model_dropdown.render()
119
  bounding_box_input.render()
 
 
 
 
 
 
 
 
 
 
 
120
  with gr.Column(scale=7):
121
  chatbot.render()
122
  with gr.Row():
@@ -124,12 +170,12 @@ with gr.Blocks(title=model_name, theme=gr.themes.Ocean()) as demo:
124
  clear_btn = gr.Button("Clear", variant="secondary")
125
 
126
  send_click_event = send_btn.click(
127
- inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input], chatbot
128
  ).then(submit_chat, [chatbot, text_input], [chatbot, text_input])
129
  submit_event = text_input.submit(
130
- inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input], chatbot
131
  ).then(submit_chat, [chatbot, text_input], [chatbot, text_input])
132
 
133
- clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input, bounding_box_input])
134
 
135
- demo.launch()
 
9
  cur_dir = os.path.dirname(os.path.abspath(__file__))
10
 
11
  @spaces.GPU()
12
+ def inference_with_gradio(chatbot, image, prompt, model_path, box=None, temperature=0.2, top_p=0.7, max_new_tokens=512):
13
  dir_path = os.path.dirname(image)
14
  # image_path = image
15
  # Define the directory where you want to save the image (current directory)
 
37
  image_dir=dir_path,
38
  prompt=prompt,
39
  model_path="jadechoghari/Ferret-UI-Gemma2b",
40
+ conv_mode=conv_mode,
41
+ temperature=temperature,
42
+ top_p=top_p,
43
+ box=box,
44
+ max_new_tokens=max_new_tokens,
45
  # stop=stop # Assuming we want to process the image
46
  )
47
 
 
59
 
60
  def submit_chat(chatbot, text_input):
61
  response = ''
62
+ # chatbot.append((text_input, response))
63
  return chatbot, ''
64
 
65
  def clear_chat():
66
+ return [], None, "", "", 0.2, 0.7, 512
67
 
68
  with open(f"{cur_dir}/logo.svg", "r", encoding="utf-8") as svg_file:
69
  svg_content = svg_file.read()
 
76
  </p>
77
  <center><font size=3><b>{model_name}</b> Demo: Upload an image, provide a prompt, and get insights using advanced AI models. <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b'>😊 Huggingface</a></font></center>
78
  """
79
+ with open(f"{cur_dir}/ferretui_icon.png", "rb") as image_file:
80
+ image_data = image_file.read()
81
+ # html = f"""
82
+ # <p align="center">
83
+ # <img src='data:image/png;base64,{image_data.encode("base64").decode("utf-8")}' alt='Ferret-UI' style='width: 100px; vertical-align: middle; border-radius: 15px; box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);'/>
84
+ # <span style="font-size: 2em; font-weight: bold; margin-left: 10px; vertical-align: middle;">{model_name}</span>
85
+ # </p>
86
+ # <center><font size=3><b>{model_name}</b> Demo: Upload an image, provide a prompt, and get insights using advanced AI models. <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b'>😊 Huggingface</a></font></center>
87
+ # """
88
+
89
+ html = f"""
90
+ <div style="text-align: center; padding: 20px;">
91
+ <div style="display: inline-block; background-color: #f5f5f7; padding: 20px; border-radius: 20px; box-shadow: 0px 6px 20px rgba(0, 0, 0, 0.1);">
92
+ <div style="display: flex; align-items: center;">
93
+ <img src='https://github.com/apple/ml-ferret/blob/main/ferretui/figs/ferretui_icon.png?raw=true' alt='Ferret-UI'
94
+ style='width: 80px; height: 80px; border-radius: 20px; box-shadow: 0px 8px 16px rgba(0, 0, 0, 0.2);'/>
95
+ <div style="margin-left: 15px;">
96
+ <h1 style="font-size: 2.8em; font-family: -apple-system, BlinkMacSystemFont, sans-serif; color: #1D1D1F;
97
+ font-weight: bold; margin-bottom: 0;">ο£Ώ {model_name}</h1>
98
+ <p style="font-size: 1.2em; color: #6e6e73; font-family: -apple-system, BlinkMacSystemFont, sans-serif; margin-top: 5px;">
99
+ πŸ“± Grounded Mobile UI Understanding with Multimodal LLMs.<br>
100
+ A new MLLM tailored for enhanced understanding of mobile UI screens, equipped with referring, grounding, and reasoning capabilities.
101
+ </p>
102
+ <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b' style='text-decoration: none;'>
103
+ <button style="background-color: #007aff; color: white; font-size: 1.2em; padding: 10px 20px; border-radius: 10px; border: none; margin-top: 10px; box-shadow: 0px 4px 12px rgba(0, 122, 255, 0.4); cursor: pointer;">
104
+ πŸ€— Try on Hugging Face
105
+ </button>
106
+ </a>
107
+ </div>
108
+ </div>
109
+ </div>
110
+ <p style="font-size: 1.2em; color: #86868B; font-family: -apple-system, BlinkMacSystemFont, sans-serif; margin-top: 30px;">
111
+ We release two Ferret-UI checkpoints, built on gemma-2b and Llama-3-8B models respectively, for public exploration. πŸš€
112
+ </p>
113
+ </div>
114
+ """
115
 
116
  latex_delimiters_set = [{
117
  "left": "\\(",
 
136
  ], label="Model Path", value="jadechoghari/Ferret-UI-Gemma2b")
137
 
138
  bounding_box_input = gr.Textbox(placeholder="Optional bounding box (x1, y1, x2, y2)", label="Bounding Box (optional)")
139
+ # Adding Sliders for temperature, top_p, and max_new_tokens
140
+ temperature_input = gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=0.2, label="Temperature")
141
+ top_p_input = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label="Top P")
142
+ max_new_tokens_input = gr.Slider(minimum=1, maximum=1024, step=1, value=512, label="Max New Tokens")
143
+
144
+
145
  chatbot = gr.Chatbot(label="Chat with Ferret-UI", height=400, show_copy_button=True, latex_delimiters=latex_delimiters_set)
146
 
147
  with gr.Blocks(title=model_name, theme=gr.themes.Ocean()) as demo:
148
  gr.HTML(html)
149
  with gr.Row():
150
  with gr.Column(scale=3):
 
 
 
 
 
 
 
 
151
  image_input.render()
152
  text_input.render()
153
  model_dropdown.render()
154
  bounding_box_input.render()
155
+ temperature_input.render() # Render temperature input
156
+ top_p_input.render() # Render top_p input
157
+ max_new_tokens_input.render()
158
+ gr.Examples(
159
+ examples=[
160
+ ["appstore_reminders.png", "Describe the image in details", "jadechoghari/Ferret-UI-Gemma2b", None],
161
+ ["appstore_reminders.png", "What's inside the selected region?", "jadechoghari/Ferret-UI-Gemma2b", "189, 906, 404, 970"],
162
+ ["appstore_reminders.png", "Where is the Game Tab?", "jadechoghari/Ferret-UI-Gemma2b", None],
163
+ ],
164
+ inputs=[image_input, text_input, model_dropdown, bounding_box_input]
165
+ )
166
  with gr.Column(scale=7):
167
  chatbot.render()
168
  with gr.Row():
 
170
  clear_btn = gr.Button("Clear", variant="secondary")
171
 
172
  send_click_event = send_btn.click(
173
+ inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input], chatbot
174
  ).then(submit_chat, [chatbot, text_input], [chatbot, text_input])
175
  submit_event = text_input.submit(
176
+ inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input], chatbot
177
  ).then(submit_chat, [chatbot, text_input], [chatbot, text_input])
178
 
179
+ clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input])
180
 
181
+ demo.launch()