Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,10 @@ from segment_anything import sam_model_registry
|
|
25 |
import easyocr
|
26 |
import re
|
27 |
import edge_tts
|
|
|
28 |
|
|
|
|
|
29 |
# import tts
|
30 |
|
31 |
###############################################################################
|
@@ -648,7 +651,9 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
648 |
Human_prompt = f'\nHuman: The description of the image with path {new_image_path} is: {img_caption}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
|
649 |
AI_prompt = "Received."
|
650 |
visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
|
651 |
-
visual_chatgpt.agent.memory.
|
|
|
|
|
652 |
parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
|
653 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
654 |
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
|
@@ -756,7 +761,8 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
|
|
756 |
# input_points=input_points, input_labels=input_labels)
|
757 |
|
758 |
|
759 |
-
if not args.disable_gpt and text_refiner:
|
|
|
760 |
print("new crop save",new_crop_save_path)
|
761 |
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
762 |
if focus_info.startswith('"') and focus_info.endswith('"'):
|
@@ -961,7 +967,8 @@ async def inference_traject(origin_image,sketcher_image, enable_wiki, language,
|
|
961 |
sketcher_image['image']=image_input
|
962 |
|
963 |
|
964 |
-
if not args.disable_gpt and text_refiner:
|
|
|
965 |
focus_info=get_image_gpt(openai_api_key,crop_save_path,prompt)
|
966 |
if focus_info.startswith('"') and focus_info.endswith('"'):
|
967 |
focus_info=focus_info[1:-1]
|
@@ -1006,7 +1013,9 @@ def clear_chat_memory(visual_chatgpt, keep_global=False):
|
|
1006 |
visual_chatgpt.memory.clear()
|
1007 |
visual_chatgpt.point_prompt = ""
|
1008 |
if keep_global:
|
1009 |
-
visual_chatgpt.agent.memory.buffer = visual_chatgpt.global_prompt
|
|
|
|
|
1010 |
else:
|
1011 |
visual_chatgpt.current_image = None
|
1012 |
visual_chatgpt.global_prompt = ""
|
@@ -1054,7 +1063,9 @@ async def cap_everything(paragraph, visual_chatgpt,language,autoplay):
|
|
1054 |
Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
|
1055 |
AI_prompt = "Received."
|
1056 |
visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
|
1057 |
-
|
|
|
|
|
1058 |
# waveform_visual, audio_output=tts.predict(paragraph, input_language, input_audio, input_mic, use_mic, agree)
|
1059 |
audio_output=await texttospeech(paragraph,language,autoplay)
|
1060 |
return paragraph,audio_output
|
@@ -1075,26 +1086,27 @@ def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragr
|
|
1075 |
Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
|
1076 |
AI_prompt = "Received."
|
1077 |
visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
|
1078 |
-
visual_chatgpt.agent.memory.
|
|
|
1079 |
return paragraph
|
1080 |
|
1081 |
-
def handle_liked(state,like_res):
|
1082 |
-
|
1083 |
-
|
1084 |
-
|
1085 |
-
|
1086 |
-
|
1087 |
-
|
1088 |
-
|
1089 |
|
1090 |
-
def handle_disliked(state,dislike_res):
|
1091 |
-
|
1092 |
-
|
1093 |
-
|
1094 |
-
|
1095 |
-
|
1096 |
-
|
1097 |
-
|
1098 |
|
1099 |
|
1100 |
def get_style():
|
@@ -1187,6 +1199,20 @@ async def texttospeech(text, language, autoplay):
|
|
1187 |
print(f"Error in texttospeech: {e}")
|
1188 |
return None
|
1189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1190 |
|
1191 |
def create_ui():
|
1192 |
title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
|
@@ -1273,7 +1299,7 @@ def create_ui():
|
|
1273 |
|
1274 |
with gr.Column():
|
1275 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1276 |
-
with gr.Tab("Base(GPT Power)"
|
1277 |
image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
1278 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1279 |
with gr.Row():
|
@@ -1404,8 +1430,8 @@ def create_ui():
|
|
1404 |
with gr.Row():
|
1405 |
clear_button_text = gr.Button(value="Clear Text", interactive=True)
|
1406 |
submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
|
1407 |
-
upvote_btn = gr.Button(value="π Upvote", interactive=True)
|
1408 |
-
downvote_btn = gr.Button(value="π Downvote", interactive=True)
|
1409 |
|
1410 |
|
1411 |
with gr.Row():
|
@@ -1676,7 +1702,7 @@ def create_ui():
|
|
1676 |
|
1677 |
mv_images = gr.State()
|
1678 |
|
1679 |
-
|
1680 |
|
1681 |
submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
|
1682 |
fn=generate_mvs,
|
@@ -1896,17 +1922,17 @@ def create_ui():
|
|
1896 |
queue=True
|
1897 |
)
|
1898 |
|
1899 |
-
upvote_btn.click(
|
1900 |
-
|
1901 |
-
|
1902 |
-
|
1903 |
-
)
|
1904 |
|
1905 |
-
downvote_btn.click(
|
1906 |
-
|
1907 |
-
|
1908 |
-
|
1909 |
-
)
|
1910 |
|
1911 |
|
1912 |
|
@@ -1920,3 +1946,4 @@ if __name__ == '__main__':
|
|
1920 |
iface.queue(api_open=False, max_size=10)
|
1921 |
# iface.queue(concurrency_count=5, api_open=False, max_size=10)
|
1922 |
iface.launch(server_name="0.0.0.0")
|
|
|
|
25 |
import easyocr
|
26 |
import re
|
27 |
import edge_tts
|
28 |
+
from langchain import __version__
|
29 |
|
30 |
+
# Print the current version of LangChain
|
31 |
+
print(f"Current LangChain version: {__version__}")
|
32 |
# import tts
|
33 |
|
34 |
###############################################################################
|
|
|
651 |
Human_prompt = f'\nHuman: The description of the image with path {new_image_path} is: {img_caption}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
|
652 |
AI_prompt = "Received."
|
653 |
visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
|
654 |
+
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
655 |
+
print("memory",visual_chatgpt.agent.memory)
|
656 |
+
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
657 |
parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
|
658 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
659 |
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
|
|
|
761 |
# input_points=input_points, input_labels=input_labels)
|
762 |
|
763 |
|
764 |
+
# if not args.disable_gpt and text_refiner:
|
765 |
+
if not args.disable_gpt:
|
766 |
print("new crop save",new_crop_save_path)
|
767 |
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
768 |
if focus_info.startswith('"') and focus_info.endswith('"'):
|
|
|
967 |
sketcher_image['image']=image_input
|
968 |
|
969 |
|
970 |
+
# if not args.disable_gpt and text_refiner:
|
971 |
+
if not args.disable_gpt:
|
972 |
focus_info=get_image_gpt(openai_api_key,crop_save_path,prompt)
|
973 |
if focus_info.startswith('"') and focus_info.endswith('"'):
|
974 |
focus_info=focus_info[1:-1]
|
|
|
1013 |
visual_chatgpt.memory.clear()
|
1014 |
visual_chatgpt.point_prompt = ""
|
1015 |
if keep_global:
|
1016 |
+
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.global_prompt
|
1017 |
+
visual_chatgpt.agent.memory.save_context({"input": visual_chatgpt.global_prompt}, {"output": None})
|
1018 |
+
print("test")
|
1019 |
else:
|
1020 |
visual_chatgpt.current_image = None
|
1021 |
visual_chatgpt.global_prompt = ""
|
|
|
1063 |
Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
|
1064 |
AI_prompt = "Received."
|
1065 |
visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
|
1066 |
+
|
1067 |
+
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
1068 |
+
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
1069 |
# waveform_visual, audio_output=tts.predict(paragraph, input_language, input_audio, input_mic, use_mic, agree)
|
1070 |
audio_output=await texttospeech(paragraph,language,autoplay)
|
1071 |
return paragraph,audio_output
|
|
|
1086 |
Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
|
1087 |
AI_prompt = "Received."
|
1088 |
visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
|
1089 |
+
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
1090 |
+
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
1091 |
return paragraph
|
1092 |
|
1093 |
+
# def handle_liked(state,like_res):
|
1094 |
+
# if state:
|
1095 |
+
# like_res.append(state[-1][1])
|
1096 |
+
# print(f"Last response recorded: {state[-1][1]}")
|
1097 |
+
# else:
|
1098 |
+
# print("No response to record.")
|
1099 |
+
# state = state + [(None, f"Liked Received π")]
|
1100 |
+
# return state,like_res
|
1101 |
|
1102 |
+
# def handle_disliked(state,dislike_res):
|
1103 |
+
# if state:
|
1104 |
+
# dislike_res.append(state[-1][1])
|
1105 |
+
# print(f"Last response recorded: {state[-1][1]}")
|
1106 |
+
# else:
|
1107 |
+
# print("No response to record.")
|
1108 |
+
# state = state + [(None, f"Disliked Received π₯Ή")]
|
1109 |
+
# return state,dislike_res
|
1110 |
|
1111 |
|
1112 |
def get_style():
|
|
|
1199 |
print(f"Error in texttospeech: {e}")
|
1200 |
return None
|
1201 |
|
1202 |
+
def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
|
1203 |
+
print(x.index, x.value, x.liked)
|
1204 |
+
if x.liked == True:
|
1205 |
+
print("liked")
|
1206 |
+
like_res.append(x.value)
|
1207 |
+
print(like_res)
|
1208 |
+
state = state + [(None, f"Liked Received π")]
|
1209 |
+
else:
|
1210 |
+
dislike_res.append(x.value)
|
1211 |
+
state = state + [(None, f"Disliked Received π")]
|
1212 |
+
return like_res,dislike_res,state
|
1213 |
+
|
1214 |
+
|
1215 |
+
|
1216 |
|
1217 |
def create_ui():
|
1218 |
title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
|
|
|
1299 |
|
1300 |
with gr.Column():
|
1301 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
1302 |
+
with gr.Tab("Base(GPT Power)") as base_tab:
|
1303 |
image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
1304 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
1305 |
with gr.Row():
|
|
|
1430 |
with gr.Row():
|
1431 |
clear_button_text = gr.Button(value="Clear Text", interactive=True)
|
1432 |
submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
|
1433 |
+
# upvote_btn = gr.Button(value="π Upvote", interactive=True)
|
1434 |
+
# downvote_btn = gr.Button(value="π Downvote", interactive=True)
|
1435 |
|
1436 |
|
1437 |
with gr.Row():
|
|
|
1702 |
|
1703 |
mv_images = gr.State()
|
1704 |
|
1705 |
+
chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
|
1706 |
|
1707 |
submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
|
1708 |
fn=generate_mvs,
|
|
|
1922 |
queue=True
|
1923 |
)
|
1924 |
|
1925 |
+
# upvote_btn.click(
|
1926 |
+
# handle_liked,
|
1927 |
+
# inputs=[state,like_res],
|
1928 |
+
# outputs=[chatbot,like_res]
|
1929 |
+
# )
|
1930 |
|
1931 |
+
# downvote_btn.click(
|
1932 |
+
# handle_disliked,
|
1933 |
+
# inputs=[state,dislike_res],
|
1934 |
+
# outputs=[chatbot,dislike_res]
|
1935 |
+
# )
|
1936 |
|
1937 |
|
1938 |
|
|
|
1946 |
iface.queue(api_open=False, max_size=10)
|
1947 |
# iface.queue(concurrency_count=5, api_open=False, max_size=10)
|
1948 |
iface.launch(server_name="0.0.0.0")
|
1949 |
+
|