EyeSee_chi

Running

App Files Files Community

Niki Zhang commited on Jun 9, 2024

Commit

8068524

verified ·

1 Parent(s): 01a8cab

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -36

app.py CHANGED Viewed

@@ -25,7 +25,10 @@ from segment_anything import sam_model_registry
 import easyocr
 import re
 import edge_tts
 # import tts
 ###############################################################################
@@ -648,7 +651,9 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
         Human_prompt = f'\nHuman: The description of the image with path {new_image_path} is: {img_caption}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
         AI_prompt = "Received."
         visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
-        visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
         parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
         parsed_data = json.loads(parsed_data.replace("'", "\""))
         name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
@@ -756,7 +761,8 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
     #                                   input_points=input_points, input_labels=input_labels)
-    if not args.disable_gpt and text_refiner:
         print("new crop save",new_crop_save_path)
         focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
         if focus_info.startswith('"') and focus_info.endswith('"'):
@@ -961,7 +967,8 @@ async def inference_traject(origin_image,sketcher_image, enable_wiki, language,
     sketcher_image['image']=image_input
-    if not args.disable_gpt and text_refiner:
         focus_info=get_image_gpt(openai_api_key,crop_save_path,prompt)
         if focus_info.startswith('"') and focus_info.endswith('"'):
             focus_info=focus_info[1:-1]
@@ -1006,7 +1013,9 @@ def clear_chat_memory(visual_chatgpt, keep_global=False):
         visual_chatgpt.memory.clear()
         visual_chatgpt.point_prompt = ""
         if keep_global:
-            visual_chatgpt.agent.memory.buffer = visual_chatgpt.global_prompt
         else:
             visual_chatgpt.current_image = None
             visual_chatgpt.global_prompt = ""
@@ -1054,7 +1063,9 @@ async def cap_everything(paragraph, visual_chatgpt,language,autoplay):
     Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
     AI_prompt = "Received."
     visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
-    visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
     # waveform_visual, audio_output=tts.predict(paragraph, input_language, input_audio, input_mic, use_mic, agree)
     audio_output=await texttospeech(paragraph,language,autoplay)
     return paragraph,audio_output
@@ -1075,26 +1086,27 @@ def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragr
     Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
     AI_prompt = "Received."
     visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
-    visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
     return paragraph
-def handle_liked(state,like_res):
-    if state:
-        like_res.append(state[-1][1])
-        print(f"Last response recorded: {state[-1][1]}")
-    else:
-        print("No response to record.")
-    state = state + [(None, f"Liked Received 👍")]
-    return state,like_res
-def handle_disliked(state,dislike_res):
-    if state:
-        dislike_res.append(state[-1][1])
-        print(f"Last response recorded: {state[-1][1]}")
-    else:
-        print("No response to record.")
-    state = state + [(None, f"Disliked Received 🥹")]
-    return state,dislike_res
 def get_style():
@@ -1187,6 +1199,20 @@ async def texttospeech(text, language, autoplay):
         print(f"Error in texttospeech: {e}")
         return None
 def create_ui():
     title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
@@ -1273,7 +1299,7 @@ def create_ui():
             with gr.Column():
                 with gr.Column(visible=False) as modules_not_need_gpt:
-                    with gr.Tab("Base(GPT Power)",visible=False) as base_tab:
                         image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
                         example_image = gr.Image(type="pil", interactive=False, visible=False)
                         with gr.Row():
@@ -1404,8 +1430,8 @@ def create_ui():
                             with gr.Row():
                                 clear_button_text = gr.Button(value="Clear Text", interactive=True)
                                 submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
-                                upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
-                                downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
                             with gr.Row():
@@ -1676,7 +1702,7 @@ def create_ui():
         mv_images = gr.State()
-        # chatbot.like(handle_like_dislike, inputs=[like_state, dislike_state], outputs=[like_state, dislike_state])
         submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
             fn=generate_mvs,
@@ -1896,17 +1922,17 @@ def create_ui():
             queue=True
         )
-        upvote_btn.click(
-            handle_liked,
-            inputs=[state,like_res],
-            outputs=[chatbot,like_res]
-        )
-        downvote_btn.click(
-            handle_disliked,
-            inputs=[state,dislike_res],
-            outputs=[chatbot,dislike_res]
-        )
@@ -1920,3 +1946,4 @@ if __name__ == '__main__':
     iface.queue(api_open=False, max_size=10)
     # iface.queue(concurrency_count=5, api_open=False, max_size=10)
     iface.launch(server_name="0.0.0.0")

 import easyocr
 import re
 import edge_tts
+from langchain import __version__
+# Print the current version of LangChain
+print(f"Current LangChain version: {__version__}")
 # import tts
 ###############################################################################
         Human_prompt = f'\nHuman: The description of the image with path {new_image_path} is: {img_caption}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
         AI_prompt = "Received."
         visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
+        visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
+        print("memory",visual_chatgpt.agent.memory)
+        # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
         parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
         parsed_data = json.loads(parsed_data.replace("'", "\""))
         name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
     #                                   input_points=input_points, input_labels=input_labels)
+    # if not args.disable_gpt and text_refiner:
+    if not args.disable_gpt:
         print("new crop save",new_crop_save_path)
         focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
         if focus_info.startswith('"') and focus_info.endswith('"'):
     sketcher_image['image']=image_input
+    # if not args.disable_gpt and text_refiner:
+    if not args.disable_gpt:
         focus_info=get_image_gpt(openai_api_key,crop_save_path,prompt)
         if focus_info.startswith('"') and focus_info.endswith('"'):
             focus_info=focus_info[1:-1]
         visual_chatgpt.memory.clear()
         visual_chatgpt.point_prompt = ""
         if keep_global:
+            # visual_chatgpt.agent.memory.buffer = visual_chatgpt.global_prompt
+            visual_chatgpt.agent.memory.save_context({"input": visual_chatgpt.global_prompt}, {"output": None})
+            print("test")
         else:
             visual_chatgpt.current_image = None
             visual_chatgpt.global_prompt = ""
     Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
     AI_prompt = "Received."
     visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
+    # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
+    visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
     # waveform_visual, audio_output=tts.predict(paragraph, input_language, input_audio, input_mic, use_mic, agree)
     audio_output=await texttospeech(paragraph,language,autoplay)
     return paragraph,audio_output
     Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
     AI_prompt = "Received."
     visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
+    visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
+    # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
     return paragraph
+# def handle_liked(state,like_res):
+#     if state:
+#         like_res.append(state[-1][1])
+#         print(f"Last response recorded: {state[-1][1]}")
+#     else:
+#         print("No response to record.")
+#     state = state + [(None, f"Liked Received 👍")]
+#     return state,like_res
+# def handle_disliked(state,dislike_res):
+#     if state:
+#         dislike_res.append(state[-1][1])
+#         print(f"Last response recorded: {state[-1][1]}")
+#     else:
+#         print("No response to record.")
+#     state = state + [(None, f"Disliked Received 🥹")]
+#     return state,dislike_res
 def get_style():
         print(f"Error in texttospeech: {e}")
         return None
+def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
+    print(x.index, x.value, x.liked)
+    if x.liked == True:
+        print("liked")
+        like_res.append(x.value)
+        print(like_res)
+        state = state + [(None, f"Liked Received 👍")]
+    else:
+        dislike_res.append(x.value)
+        state = state + [(None, f"Disliked Received 👎")]
+    return like_res,dislike_res,state
 def create_ui():
     title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
             with gr.Column():
                 with gr.Column(visible=False) as modules_not_need_gpt:
+                    with gr.Tab("Base(GPT Power)") as base_tab:
                         image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
                         example_image = gr.Image(type="pil", interactive=False, visible=False)
                         with gr.Row():
                             with gr.Row():
                                 clear_button_text = gr.Button(value="Clear Text", interactive=True)
                                 submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
+                                # upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
+                                # downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
                             with gr.Row():
         mv_images = gr.State()
+        chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
         submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
             fn=generate_mvs,
             queue=True
         )
+        # upvote_btn.click(
+        #     handle_liked,
+        #     inputs=[state,like_res],
+        #     outputs=[chatbot,like_res]
+        # )
+        # downvote_btn.click(
+        #     handle_disliked,
+        #     inputs=[state,dislike_res],
+        #     outputs=[chatbot,dislike_res]
+        # )
     iface.queue(api_open=False, max_size=10)
     # iface.queue(concurrency_count=5, api_open=False, max_size=10)
     iface.launch(server_name="0.0.0.0")