Spaces:
Build error
Build error
Update app_dialogue.py
Browse files- app_dialogue.py +44 -2
app_dialogue.py
CHANGED
@@ -115,7 +115,26 @@ def convert_to_rgb(filepath_or_pilimg):
|
|
115 |
|
116 |
return temp_file_path # Return the path to the saved image
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
|
|
119 |
def base64_to_pil(encoded_image):
|
120 |
decoded_image = base64.b64decode(encoded_image)
|
121 |
pil_image = Image.open(BytesIO(decoded_image))
|
@@ -322,37 +341,60 @@ def format_user_prompt_with_im_history_and_system_conditioning(
|
|
322 |
Produces the resulting list that needs to go inside the processor.
|
323 |
It handles the potential image box input, the history and the system conditionning.
|
324 |
"""
|
|
|
|
|
|
|
|
|
|
|
325 |
resulting_list = copy.deepcopy(SYSTEM_PROMPT)
|
326 |
|
327 |
# Format history
|
328 |
for turn in history:
|
|
|
329 |
user_utterance, assistant_utterance = turn
|
|
|
330 |
splitted_user_utterance = split_str_on_im_markdown(user_utterance)
|
|
|
331 |
splitted_user_utterance = [
|
332 |
im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
|
333 |
for s in splitted_user_utterance
|
334 |
if s != ""
|
335 |
]
|
|
|
|
|
336 |
if isinstance(splitted_user_utterance[0], str):
|
337 |
resulting_list.append("\nUser: ")
|
338 |
else:
|
339 |
resulting_list.append("\nUser:")
|
|
|
340 |
resulting_list.extend(splitted_user_utterance)
|
|
|
341 |
resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
|
|
|
|
|
342 |
|
343 |
# Format current input
|
344 |
current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
|
|
|
|
|
345 |
if current_image is None:
|
|
|
346 |
if "<img src=data:image/png;base64" in current_user_prompt_str:
|
347 |
raise ValueError("The UI does not support inputing via the text box an image in base64.")
|
348 |
current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
|
|
|
349 |
resulting_list.append("\nUser: ")
|
|
|
350 |
resulting_list.extend(current_user_prompt_list)
|
|
|
351 |
resulting_list.append("<end_of_utterance>\nAssistant:")
|
|
|
352 |
return resulting_list, current_user_prompt_list
|
353 |
else:
|
|
|
354 |
# Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
|
355 |
-
resulting_list.extend(["\nUser:", current_image, f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"])
|
|
|
356 |
return resulting_list, [current_user_prompt_str]
|
357 |
|
358 |
|
@@ -535,7 +577,7 @@ with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base()) as demo:
|
|
535 |
)
|
536 |
processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
|
537 |
|
538 |
-
imagebox = gr.Image(type="
|
539 |
|
540 |
with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
|
541 |
max_new_tokens = gr.Slider(
|
|
|
115 |
|
116 |
return temp_file_path # Return the path to the saved image
|
117 |
|
118 |
+
def pil_to_markdown_im(image):
|
119 |
+
"""
|
120 |
+
Convert a PIL image into markdown filled with the base64 string representation.
|
121 |
+
"""
|
122 |
+
print(f"***** pil_to_markdown_im ******")
|
123 |
+
print(f"params: image is - {image}")
|
124 |
+
#if isinstance(image, PIL.Image.Image):
|
125 |
+
#img_b64_str = pil_to_base64(image)
|
126 |
+
#img_str = f'<img src="data:image/png;base64,{img_b64_str}" />'
|
127 |
+
#if path_or_url.startswith(("http://", "https://")):
|
128 |
+
#response = requests.get(image)
|
129 |
+
#image = Image.open(BytesIO(response.content))
|
130 |
+
# Generate a unique filename using UUID
|
131 |
+
filename = f"{uuid.uuid4()}.jpg"
|
132 |
+
local_path = f"{filename}"
|
133 |
+
image.save(local_path)
|
134 |
+
img_str = f""
|
135 |
+
return img_str
|
136 |
|
137 |
+
|
138 |
def base64_to_pil(encoded_image):
|
139 |
decoded_image = base64.b64decode(encoded_image)
|
140 |
pil_image = Image.open(BytesIO(decoded_image))
|
|
|
341 |
Produces the resulting list that needs to go inside the processor.
|
342 |
It handles the potential image box input, the history and the system conditionning.
|
343 |
"""
|
344 |
+
print(f"*********format_user_prompt_with_im_history_and_system_conditioning*********")
|
345 |
+
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_user_prompt_str is - {current_user_prompt_str} ")
|
346 |
+
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_image is - {current_image} ")
|
347 |
+
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param history is - {history} ")
|
348 |
+
|
349 |
resulting_list = copy.deepcopy(SYSTEM_PROMPT)
|
350 |
|
351 |
# Format history
|
352 |
for turn in history:
|
353 |
+
print(f"inside for loop, turn is - {turn}")
|
354 |
user_utterance, assistant_utterance = turn
|
355 |
+
print("calling split_str_on_im_markdown from inside for loop inside format_user_prompt_with_im_history_and_system_conditioning")
|
356 |
splitted_user_utterance = split_str_on_im_markdown(user_utterance)
|
357 |
+
print(f"splitted_user_utterance from split_str_on_im_markdown is - {splitted_user_utterance} ")
|
358 |
splitted_user_utterance = [
|
359 |
im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
|
360 |
for s in splitted_user_utterance
|
361 |
if s != ""
|
362 |
]
|
363 |
+
print(f"splitted_user_utterance after im_markdown_to_pil() is - {splitted_user_utterance} ")
|
364 |
+
|
365 |
if isinstance(splitted_user_utterance[0], str):
|
366 |
resulting_list.append("\nUser: ")
|
367 |
else:
|
368 |
resulting_list.append("\nUser:")
|
369 |
+
print(f"resulting_list after if..else block is - {resulting_list}")
|
370 |
resulting_list.extend(splitted_user_utterance)
|
371 |
+
print(f"resulting_list after extend is - {resulting_list}")
|
372 |
resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
|
373 |
+
print(f"resulting_list after append is - {resulting_list}")
|
374 |
+
|
375 |
|
376 |
# Format current input
|
377 |
current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
|
378 |
+
print(f"current_user_prompt_str is - {current_user_prompt_str}")
|
379 |
+
|
380 |
if current_image is None:
|
381 |
+
print("inside IF : current_image is NONE")
|
382 |
if "<img src=data:image/png;base64" in current_user_prompt_str:
|
383 |
raise ValueError("The UI does not support inputing via the text box an image in base64.")
|
384 |
current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
|
385 |
+
print(f"current_user_prompt_list (or [user_prompt]/resulting_user_prompt((most likely this one)) from handle_manual_images_in_user_prompt ) is - {current_user_prompt_list}")
|
386 |
resulting_list.append("\nUser: ")
|
387 |
+
print(f"resulting_list with append user - {resulting_list}")
|
388 |
resulting_list.extend(current_user_prompt_list)
|
389 |
+
print(f"resulting_list after extend with current_user_prompt_list is - {resulting_list}")
|
390 |
resulting_list.append("<end_of_utterance>\nAssistant:")
|
391 |
+
print(f"resulting_list after append with end_of_utteranceAssistant is - {resulting_list}")
|
392 |
return resulting_list, current_user_prompt_list
|
393 |
else:
|
394 |
+
print("inside ELSE : current_image is not NONE")
|
395 |
# Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
|
396 |
+
resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
|
397 |
+
print(f"final resulting_list passed on to calling function is - {resulting_list}")
|
398 |
return resulting_list, [current_user_prompt_str]
|
399 |
|
400 |
|
|
|
577 |
)
|
578 |
processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
|
579 |
|
580 |
+
imagebox = gr.Image(type="filepath", label="Image input")
|
581 |
|
582 |
with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
|
583 |
max_new_tokens = gr.Slider(
|