Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -512,10 +512,6 @@ css = """
|
|
512 |
}
|
513 |
|
514 |
|
515 |
-
.image_upload {
|
516 |
-
height: 650px;
|
517 |
-
}
|
518 |
-
|
519 |
.info_btn {
|
520 |
background: white !important;
|
521 |
border: none !important;
|
@@ -569,23 +565,22 @@ prompt_list = [
|
|
569 |
'''
|
570 |
prompt_list = [
|
571 |
[
|
572 |
-
|
573 |
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
574 |
-
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption.
|
575 |
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
576 |
-
'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
577 |
],
|
578 |
[
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
583 |
],
|
584 |
[
|
585 |
-
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
586 |
-
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
587 |
-
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
588 |
-
'You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
589 |
]
|
590 |
]
|
591 |
|
@@ -770,10 +765,14 @@ def update_click_state(click_state, caption, click_mode):
|
|
770 |
raise NotImplementedError
|
771 |
|
772 |
async def chat_input_callback(*args):
|
773 |
-
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay,gender = args
|
774 |
message = chat_input["text"]
|
|
|
|
|
775 |
if visual_chatgpt is not None:
|
776 |
-
|
|
|
|
|
777 |
last_text, last_response = state[-1]
|
778 |
print("last response",last_response)
|
779 |
if autoplay==False:
|
@@ -886,7 +885,7 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
886 |
|
887 |
|
888 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
889 |
-
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist, gender]
|
890 |
|
891 |
|
892 |
|
@@ -965,7 +964,7 @@ query_focus = {
|
|
965 |
"D": "Provide a description of the item.",
|
966 |
"DA": "Provide a description and analysis of the item.",
|
967 |
"DAI": "Provide a description, analysis, and interpretation of the item.",
|
968 |
-
"
|
969 |
}
|
970 |
|
971 |
|
@@ -1029,18 +1028,18 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
|
|
1029 |
audio_output = await texttospeech(read_info, language, autoplay,gender)
|
1030 |
print("done")
|
1031 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1032 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
1033 |
|
1034 |
except Exception as e:
|
1035 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1036 |
print(f"Error during TTS prediction: {str(e)}")
|
1037 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
1038 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
1039 |
|
1040 |
else:
|
1041 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1042 |
print(f"Error during TTS prediction: {str(e)}")
|
1043 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,None
|
1044 |
|
1045 |
|
1046 |
|
@@ -1090,30 +1089,39 @@ def get_gpt_response(api_key, image_path, prompt, enable_wiki=None):
|
|
1090 |
"Content-Type": "application/json",
|
1091 |
"Authorization": f"Bearer {api_key}"
|
1092 |
}
|
1093 |
-
|
|
|
1094 |
if image_path:
|
1095 |
-
|
1096 |
-
|
1097 |
-
|
1098 |
-
|
1099 |
-
|
1100 |
-
|
1101 |
-
|
1102 |
-
|
1103 |
-
|
1104 |
-
|
1105 |
-
|
1106 |
-
|
1107 |
-
|
1108 |
-
|
1109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1110 |
}
|
1111 |
-
|
1112 |
-
|
1113 |
-
|
1114 |
-
|
1115 |
-
|
1116 |
-
}
|
1117 |
else:
|
1118 |
payload = {
|
1119 |
"model": "gpt-4o",
|
@@ -1494,21 +1502,13 @@ async def texttospeech(text, language, autoplay,gender='female'):
|
|
1494 |
print(f"Error in texttospeech: {e}")
|
1495 |
return None
|
1496 |
|
1497 |
-
async def associate(focus_info,openai_api_key,language,state,autoplay,evt: gr.SelectData):
|
1498 |
rec_path=evt._data['value']['image']['path']
|
1499 |
print("rec_path",rec_path)
|
1500 |
prompt="""
|
1501 |
-
|
1502 |
-
- Artistic style and techniques
|
1503 |
-
- Themes and subjects
|
1504 |
-
- Color palettes and compositions
|
1505 |
-
- Historical and cultural contexts
|
1506 |
-
- Symbolism and meanings
|
1507 |
-
|
1508 |
-
Based on your analysis, provide insights into how the information enhances or contrasts with the recommended painting, and suggest any interesting interpretations or observations. Return your response in {language}
|
1509 |
-
|
1510 |
"""
|
1511 |
-
prompt=prompt.format(
|
1512 |
result=get_gpt_response(openai_api_key, rec_path, prompt)
|
1513 |
state = state + [(None, f"{result}")]
|
1514 |
read_info = re.sub(r'[#[\]!*]','',result)
|
@@ -1559,11 +1559,11 @@ def create_ui():
|
|
1559 |
|
1560 |
examples = [
|
1561 |
["test_images/ambass.jpg"],
|
1562 |
-
["test_images/test1.
|
1563 |
-
["test_images/test2.
|
1564 |
-
["test_images/test3.
|
1565 |
-
["test_images/test4.
|
1566 |
-
["test_images/test5.
|
1567 |
["test_images/Picture5.png"],
|
1568 |
|
1569 |
]
|
@@ -1597,7 +1597,7 @@ def create_ui():
|
|
1597 |
point_prompt = gr.State("Positive")
|
1598 |
log_list=gr.State([])
|
1599 |
gender=gr.State('female')
|
1600 |
-
|
1601 |
# with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
|
1602 |
# with gr.Column(scale=0.5):
|
1603 |
# # gr.Markdown("Left side content")
|
@@ -1648,7 +1648,7 @@ def create_ui():
|
|
1648 |
with gr.Column(scale=6):
|
1649 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1650 |
with gr.Tab("Base(GPT Power)") as base_tab:
|
1651 |
-
image_input_base = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
|
1652 |
with gr.Row():
|
1653 |
name_label_base = gr.Button(value="Name: ",elem_classes="info_btn")
|
1654 |
artist_label_base = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
|
@@ -1656,7 +1656,7 @@ def create_ui():
|
|
1656 |
material_label_base = gr.Button(value="Style: ",elem_classes="info_btn")
|
1657 |
|
1658 |
with gr.Tab("Base2") as base_tab2:
|
1659 |
-
image_input_base_2 = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
|
1660 |
with gr.Row():
|
1661 |
name_label_base2 = gr.Button(value="Name: ",elem_classes="info_btn")
|
1662 |
artist_label_base2 = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
|
@@ -1666,7 +1666,7 @@ def create_ui():
|
|
1666 |
with gr.Tab("Click") as click_tab:
|
1667 |
with gr.Row():
|
1668 |
with gr.Column(scale=10,min_width=600):
|
1669 |
-
image_input = gr.Image(type="pil", interactive=True, elem_classes="image_upload")
|
1670 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1671 |
with gr.Row():
|
1672 |
name_label = gr.Button(value="Name: ",elem_classes="info_btn")
|
@@ -1977,7 +1977,7 @@ def create_ui():
|
|
1977 |
|
1978 |
gallery_result.select(
|
1979 |
associate,
|
1980 |
-
inputs=[
|
1981 |
outputs=[chatbot,state,output_audio],
|
1982 |
|
1983 |
|
@@ -2243,19 +2243,19 @@ def create_ui():
|
|
2243 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2244 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2245 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2246 |
-
paragraph,artist,gender])
|
2247 |
|
2248 |
-
|
2249 |
-
|
2250 |
-
|
2251 |
-
|
2252 |
-
|
2253 |
|
2254 |
-
|
2255 |
-
|
2256 |
-
|
2257 |
-
|
2258 |
-
|
2259 |
|
2260 |
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
|
2261 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
@@ -2269,7 +2269,7 @@ def create_ui():
|
|
2269 |
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
|
2270 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
2271 |
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph,artist])
|
2272 |
-
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play,gender],
|
2273 |
[chatbot, state, aux_state,output_audio])
|
2274 |
# chat_input.submit(lambda: "", None, chat_input)
|
2275 |
chat_input.submit(lambda: {"text": ""}, None, chat_input)
|
@@ -2280,7 +2280,7 @@ def create_ui():
|
|
2280 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2281 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2282 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2283 |
-
paragraph,artist,gender])
|
2284 |
|
2285 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
2286 |
|
@@ -2331,7 +2331,7 @@ def create_ui():
|
|
2331 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path,gender
|
2332 |
],
|
2333 |
outputs=[
|
2334 |
-
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
2335 |
],
|
2336 |
show_progress=True,
|
2337 |
queue=True
|
|
|
512 |
}
|
513 |
|
514 |
|
|
|
|
|
|
|
|
|
515 |
.info_btn {
|
516 |
background: white !important;
|
517 |
border: none !important;
|
|
|
565 |
'''
|
566 |
prompt_list = [
|
567 |
[
|
|
|
568 |
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
569 |
+
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
570 |
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
571 |
+
'Wiki_caption: {Wiki_caption},You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
572 |
],
|
573 |
[
|
574 |
+
"When generating the answer, you should tell others that you are one of the creators of these paintings and generate the text in the tone and manner as if you are the creator of the painting. When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact (describes the selected object but does not include analysis) as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
|
575 |
+
"When generating the answer, you should tell others that you are one of the creators of these paintings and generate the text in the tone and manner as if you are the creator of the painting. When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
|
576 |
+
"When generating the answer, you should tell others that you are one of the creators of these paintings and generate the text in the tone and manner as if you are the creator of the painting. When generating the answer, you should tell others that you are the creator of this painting and generate the text in the tone and manner as if you are the creator of this painting. You have to help me understand what is about the selected object and list one fact, one analysis, and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.",
|
577 |
+
'Wiki_caption: {Wiki_caption},You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
578 |
],
|
579 |
[
|
580 |
+
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
581 |
+
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
582 |
+
'When generating answers, you should tell people that you are the object or the person itself that was selected, and generate text in the tone and manner in which you are the object or the person. You have to help me understand what is about the selected object and list one fact and one analysis and one interpret as markdown outline with appropriate emojis that describes what you see according to the image and {Wiki_caption}. Please generate the above points in the tone and manner as if you are the object or the person and start every sentence with I. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.',
|
583 |
+
'Wiki_caption: {Wiki_caption},You have to help me understand what is about the selected object and list one object judgement and one whole art judgement(how successful do you think the artist was?) as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Please generate the above points in the tone and manner as if you are the creator of this painting and start every sentence with I. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
584 |
]
|
585 |
]
|
586 |
|
|
|
765 |
raise NotImplementedError
|
766 |
|
767 |
async def chat_input_callback(*args):
|
768 |
+
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay,gender,api_key,image_input = args
|
769 |
message = chat_input["text"]
|
770 |
+
prompt="Please help me answer the question with this painting."
|
771 |
+
state = state + [(message,None)]
|
772 |
if visual_chatgpt is not None:
|
773 |
+
result=get_gpt_response(api_key, image_input,prompt+message)
|
774 |
+
state = state + [(None, result)]
|
775 |
+
# state, _, aux_state, _ = visual_chatgpt.run_text(message, state, aux_state)
|
776 |
last_text, last_response = state[-1]
|
777 |
print("last response",last_response)
|
778 |
if autoplay==False:
|
|
|
885 |
|
886 |
|
887 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
888 |
+
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist, gender,new_image_path]
|
889 |
|
890 |
|
891 |
|
|
|
964 |
"D": "Provide a description of the item.",
|
965 |
"DA": "Provide a description and analysis of the item.",
|
966 |
"DAI": "Provide a description, analysis, and interpretation of the item.",
|
967 |
+
"Judge": "Evaluate the item."
|
968 |
}
|
969 |
|
970 |
|
|
|
1028 |
audio_output = await texttospeech(read_info, language, autoplay,gender)
|
1029 |
print("done")
|
1030 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
1031 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
1032 |
|
1033 |
except Exception as e:
|
1034 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1035 |
print(f"Error during TTS prediction: {str(e)}")
|
1036 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
1037 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
1038 |
|
1039 |
else:
|
1040 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
1041 |
print(f"Error during TTS prediction: {str(e)}")
|
1042 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,None
|
1043 |
|
1044 |
|
1045 |
|
|
|
1089 |
"Content-Type": "application/json",
|
1090 |
"Authorization": f"Bearer {api_key}"
|
1091 |
}
|
1092 |
+
base64_images=[]
|
1093 |
+
|
1094 |
if image_path:
|
1095 |
+
if isinstance(image_path, list):
|
1096 |
+
|
1097 |
+
for img in image_path:
|
1098 |
+
base64_image = encode_image(img)
|
1099 |
+
base64_images.append(base64_image)
|
1100 |
+
else:
|
1101 |
+
base64_image = encode_image(image_path)
|
1102 |
+
base64_images.append(base64_image)
|
1103 |
+
|
1104 |
+
payload = {
|
1105 |
+
"model": "gpt-4o",
|
1106 |
+
"messages": [
|
1107 |
+
{
|
1108 |
+
"role": "user",
|
1109 |
+
"content": [
|
1110 |
+
{
|
1111 |
+
"type": "text",
|
1112 |
+
"text": prompt
|
1113 |
+
},
|
1114 |
+
{
|
1115 |
+
"type": "image_url",
|
1116 |
+
"image_url": {
|
1117 |
+
"url": f"data:image/jpeg;base64,{base64_images}"
|
1118 |
+
}
|
1119 |
}
|
1120 |
+
]
|
1121 |
+
}
|
1122 |
+
],
|
1123 |
+
"max_tokens": 300
|
1124 |
+
}
|
|
|
1125 |
else:
|
1126 |
payload = {
|
1127 |
"model": "gpt-4o",
|
|
|
1502 |
print(f"Error in texttospeech: {e}")
|
1503 |
return None
|
1504 |
|
1505 |
+
async def associate(focus_info,openai_api_key,language,state,autoplay,length, evt: gr.SelectData):
|
1506 |
rec_path=evt._data['value']['image']['path']
|
1507 |
print("rec_path",rec_path)
|
1508 |
prompt="""
|
1509 |
+
'Wiki_caption: {Wiki_caption}, you have to help me understand what is about the selected object and the objects in the second painting that may be related to the selected object and list one fact of selected object, one fact of related object in the second painting and one analysis between two objects as markdown outline with appropriate emojis that describes what you see according to the image and wiki caption. Each point listed is to be in {language} language, with a response length of about {length} words.'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1510 |
"""
|
1511 |
+
prompt=prompt.format(Wiki_caption=focus_info,language=language,length=length)
|
1512 |
result=get_gpt_response(openai_api_key, rec_path, prompt)
|
1513 |
state = state + [(None, f"{result}")]
|
1514 |
read_info = re.sub(r'[#[\]!*]','',result)
|
|
|
1559 |
|
1560 |
examples = [
|
1561 |
["test_images/ambass.jpg"],
|
1562 |
+
["test_images/test1.jpg"],
|
1563 |
+
["test_images/test2.jpg"],
|
1564 |
+
["test_images/test3.jpg"],
|
1565 |
+
["test_images/test4.jpg"],
|
1566 |
+
["test_images/test5.jpg"],
|
1567 |
["test_images/Picture5.png"],
|
1568 |
|
1569 |
]
|
|
|
1597 |
point_prompt = gr.State("Positive")
|
1598 |
log_list=gr.State([])
|
1599 |
gender=gr.State('female')
|
1600 |
+
image_path=gr.State('')
|
1601 |
# with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
|
1602 |
# with gr.Column(scale=0.5):
|
1603 |
# # gr.Markdown("Left side content")
|
|
|
1648 |
with gr.Column(scale=6):
|
1649 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1650 |
with gr.Tab("Base(GPT Power)") as base_tab:
|
1651 |
+
image_input_base = gr.Image(type="pil", interactive=True, elem_classes="image_upload",height=650)
|
1652 |
with gr.Row():
|
1653 |
name_label_base = gr.Button(value="Name: ",elem_classes="info_btn")
|
1654 |
artist_label_base = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
|
|
|
1656 |
material_label_base = gr.Button(value="Style: ",elem_classes="info_btn")
|
1657 |
|
1658 |
with gr.Tab("Base2") as base_tab2:
|
1659 |
+
image_input_base_2 = gr.Image(type="pil", interactive=True, elem_classes="image_upload",height=650)
|
1660 |
with gr.Row():
|
1661 |
name_label_base2 = gr.Button(value="Name: ",elem_classes="info_btn")
|
1662 |
artist_label_base2 = gr.Button(value="Artist: ",elem_classes="info_btn_interact")
|
|
|
1666 |
with gr.Tab("Click") as click_tab:
|
1667 |
with gr.Row():
|
1668 |
with gr.Column(scale=10,min_width=600):
|
1669 |
+
image_input = gr.Image(type="pil", interactive=True, elem_classes="image_upload",height=650)
|
1670 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1671 |
with gr.Row():
|
1672 |
name_label = gr.Button(value="Name: ",elem_classes="info_btn")
|
|
|
1977 |
|
1978 |
gallery_result.select(
|
1979 |
associate,
|
1980 |
+
inputs=[paragraph,openai_api_key,language,state,auto_play,length],
|
1981 |
outputs=[chatbot,state,output_audio],
|
1982 |
|
1983 |
|
|
|
2243 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2244 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2245 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2246 |
+
paragraph,artist,gender,image_path])
|
2247 |
|
2248 |
+
image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key,language,naritive],
|
2249 |
+
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2250 |
+
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2251 |
+
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2252 |
+
paragraph,artist,gender,image_path])
|
2253 |
|
2254 |
+
image_input.upload(upload_callback, [image_input, state, visual_chatgpt,openai_api_key,language,naritive],
|
2255 |
+
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2256 |
+
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2257 |
+
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2258 |
+
paragraph,artist,gender,image_path])
|
2259 |
|
2260 |
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
|
2261 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
|
|
2269 |
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
|
2270 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
2271 |
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph,artist])
|
2272 |
+
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play,gender,openai_api_key,image_path],
|
2273 |
[chatbot, state, aux_state,output_audio])
|
2274 |
# chat_input.submit(lambda: "", None, chat_input)
|
2275 |
chat_input.submit(lambda: {"text": ""}, None, chat_input)
|
|
|
2280 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
2281 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
2282 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
2283 |
+
paragraph,artist,gender,image_path])
|
2284 |
|
2285 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
2286 |
|
|
|
2331 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path,gender
|
2332 |
],
|
2333 |
outputs=[
|
2334 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
2335 |
],
|
2336 |
show_progress=True,
|
2337 |
queue=True
|