Niki Zhang commited on
Commit
8068524
Β·
verified Β·
1 Parent(s): 01a8cab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -36
app.py CHANGED
@@ -25,7 +25,10 @@ from segment_anything import sam_model_registry
25
  import easyocr
26
  import re
27
  import edge_tts
 
28
 
 
 
29
  # import tts
30
 
31
  ###############################################################################
@@ -648,7 +651,9 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
648
  Human_prompt = f'\nHuman: The description of the image with path {new_image_path} is: {img_caption}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
649
  AI_prompt = "Received."
650
  visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
651
- visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
 
 
652
  parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
653
  parsed_data = json.loads(parsed_data.replace("'", "\""))
654
  name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
@@ -756,7 +761,8 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
756
  # input_points=input_points, input_labels=input_labels)
757
 
758
 
759
- if not args.disable_gpt and text_refiner:
 
760
  print("new crop save",new_crop_save_path)
761
  focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
762
  if focus_info.startswith('"') and focus_info.endswith('"'):
@@ -961,7 +967,8 @@ async def inference_traject(origin_image,sketcher_image, enable_wiki, language,
961
  sketcher_image['image']=image_input
962
 
963
 
964
- if not args.disable_gpt and text_refiner:
 
965
  focus_info=get_image_gpt(openai_api_key,crop_save_path,prompt)
966
  if focus_info.startswith('"') and focus_info.endswith('"'):
967
  focus_info=focus_info[1:-1]
@@ -1006,7 +1013,9 @@ def clear_chat_memory(visual_chatgpt, keep_global=False):
1006
  visual_chatgpt.memory.clear()
1007
  visual_chatgpt.point_prompt = ""
1008
  if keep_global:
1009
- visual_chatgpt.agent.memory.buffer = visual_chatgpt.global_prompt
 
 
1010
  else:
1011
  visual_chatgpt.current_image = None
1012
  visual_chatgpt.global_prompt = ""
@@ -1054,7 +1063,9 @@ async def cap_everything(paragraph, visual_chatgpt,language,autoplay):
1054
  Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
1055
  AI_prompt = "Received."
1056
  visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
1057
- visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
 
 
1058
  # waveform_visual, audio_output=tts.predict(paragraph, input_language, input_audio, input_mic, use_mic, agree)
1059
  audio_output=await texttospeech(paragraph,language,autoplay)
1060
  return paragraph,audio_output
@@ -1075,26 +1086,27 @@ def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragr
1075
  Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
1076
  AI_prompt = "Received."
1077
  visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
1078
- visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
 
1079
  return paragraph
1080
 
1081
- def handle_liked(state,like_res):
1082
- if state:
1083
- like_res.append(state[-1][1])
1084
- print(f"Last response recorded: {state[-1][1]}")
1085
- else:
1086
- print("No response to record.")
1087
- state = state + [(None, f"Liked Received πŸ‘")]
1088
- return state,like_res
1089
 
1090
- def handle_disliked(state,dislike_res):
1091
- if state:
1092
- dislike_res.append(state[-1][1])
1093
- print(f"Last response recorded: {state[-1][1]}")
1094
- else:
1095
- print("No response to record.")
1096
- state = state + [(None, f"Disliked Received πŸ₯Ή")]
1097
- return state,dislike_res
1098
 
1099
 
1100
  def get_style():
@@ -1187,6 +1199,20 @@ async def texttospeech(text, language, autoplay):
1187
  print(f"Error in texttospeech: {e}")
1188
  return None
1189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1190
 
1191
  def create_ui():
1192
  title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
@@ -1273,7 +1299,7 @@ def create_ui():
1273
 
1274
  with gr.Column():
1275
  with gr.Column(visible=False) as modules_not_need_gpt:
1276
- with gr.Tab("Base(GPT Power)",visible=False) as base_tab:
1277
  image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
1278
  example_image = gr.Image(type="pil", interactive=False, visible=False)
1279
  with gr.Row():
@@ -1404,8 +1430,8 @@ def create_ui():
1404
  with gr.Row():
1405
  clear_button_text = gr.Button(value="Clear Text", interactive=True)
1406
  submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
1407
- upvote_btn = gr.Button(value="πŸ‘ Upvote", interactive=True)
1408
- downvote_btn = gr.Button(value="πŸ‘Ž Downvote", interactive=True)
1409
 
1410
 
1411
  with gr.Row():
@@ -1676,7 +1702,7 @@ def create_ui():
1676
 
1677
  mv_images = gr.State()
1678
 
1679
- # chatbot.like(handle_like_dislike, inputs=[like_state, dislike_state], outputs=[like_state, dislike_state])
1680
 
1681
  submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
1682
  fn=generate_mvs,
@@ -1896,17 +1922,17 @@ def create_ui():
1896
  queue=True
1897
  )
1898
 
1899
- upvote_btn.click(
1900
- handle_liked,
1901
- inputs=[state,like_res],
1902
- outputs=[chatbot,like_res]
1903
- )
1904
 
1905
- downvote_btn.click(
1906
- handle_disliked,
1907
- inputs=[state,dislike_res],
1908
- outputs=[chatbot,dislike_res]
1909
- )
1910
 
1911
 
1912
 
@@ -1920,3 +1946,4 @@ if __name__ == '__main__':
1920
  iface.queue(api_open=False, max_size=10)
1921
  # iface.queue(concurrency_count=5, api_open=False, max_size=10)
1922
  iface.launch(server_name="0.0.0.0")
 
 
25
  import easyocr
26
  import re
27
  import edge_tts
28
+ from langchain import __version__
29
 
30
+ # Print the current version of LangChain
31
+ print(f"Current LangChain version: {__version__}")
32
  # import tts
33
 
34
  ###############################################################################
 
651
  Human_prompt = f'\nHuman: The description of the image with path {new_image_path} is: {img_caption}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
652
  AI_prompt = "Received."
653
  visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
654
+ visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
655
+ print("memory",visual_chatgpt.agent.memory)
656
+ # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
657
  parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
658
  parsed_data = json.loads(parsed_data.replace("'", "\""))
659
  name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
 
761
  # input_points=input_points, input_labels=input_labels)
762
 
763
 
764
+ # if not args.disable_gpt and text_refiner:
765
+ if not args.disable_gpt:
766
  print("new crop save",new_crop_save_path)
767
  focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
768
  if focus_info.startswith('"') and focus_info.endswith('"'):
 
967
  sketcher_image['image']=image_input
968
 
969
 
970
+ # if not args.disable_gpt and text_refiner:
971
+ if not args.disable_gpt:
972
  focus_info=get_image_gpt(openai_api_key,crop_save_path,prompt)
973
  if focus_info.startswith('"') and focus_info.endswith('"'):
974
  focus_info=focus_info[1:-1]
 
1013
  visual_chatgpt.memory.clear()
1014
  visual_chatgpt.point_prompt = ""
1015
  if keep_global:
1016
+ # visual_chatgpt.agent.memory.buffer = visual_chatgpt.global_prompt
1017
+ visual_chatgpt.agent.memory.save_context({"input": visual_chatgpt.global_prompt}, {"output": None})
1018
+ print("test")
1019
  else:
1020
  visual_chatgpt.current_image = None
1021
  visual_chatgpt.global_prompt = ""
 
1063
  Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
1064
  AI_prompt = "Received."
1065
  visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
1066
+
1067
+ # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
1068
+ visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
1069
  # waveform_visual, audio_output=tts.predict(paragraph, input_language, input_audio, input_mic, use_mic, agree)
1070
  audio_output=await texttospeech(paragraph,language,autoplay)
1071
  return paragraph,audio_output
 
1086
  Human_prompt = f'\nThe description of the image with path {visual_chatgpt.current_image} is:\n{paragraph}\nThis information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
1087
  AI_prompt = "Received."
1088
  visual_chatgpt.global_prompt = Human_prompt + 'AI: ' + AI_prompt
1089
+ visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
1090
+ # visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
1091
  return paragraph
1092
 
1093
+ # def handle_liked(state,like_res):
1094
+ # if state:
1095
+ # like_res.append(state[-1][1])
1096
+ # print(f"Last response recorded: {state[-1][1]}")
1097
+ # else:
1098
+ # print("No response to record.")
1099
+ # state = state + [(None, f"Liked Received πŸ‘")]
1100
+ # return state,like_res
1101
 
1102
+ # def handle_disliked(state,dislike_res):
1103
+ # if state:
1104
+ # dislike_res.append(state[-1][1])
1105
+ # print(f"Last response recorded: {state[-1][1]}")
1106
+ # else:
1107
+ # print("No response to record.")
1108
+ # state = state + [(None, f"Disliked Received πŸ₯Ή")]
1109
+ # return state,dislike_res
1110
 
1111
 
1112
  def get_style():
 
1199
  print(f"Error in texttospeech: {e}")
1200
  return None
1201
 
1202
+ def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
1203
+ print(x.index, x.value, x.liked)
1204
+ if x.liked == True:
1205
+ print("liked")
1206
+ like_res.append(x.value)
1207
+ print(like_res)
1208
+ state = state + [(None, f"Liked Received πŸ‘")]
1209
+ else:
1210
+ dislike_res.append(x.value)
1211
+ state = state + [(None, f"Disliked Received πŸ‘Ž")]
1212
+ return like_res,dislike_res,state
1213
+
1214
+
1215
+
1216
 
1217
  def create_ui():
1218
  title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
 
1299
 
1300
  with gr.Column():
1301
  with gr.Column(visible=False) as modules_not_need_gpt:
1302
+ with gr.Tab("Base(GPT Power)") as base_tab:
1303
  image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
1304
  example_image = gr.Image(type="pil", interactive=False, visible=False)
1305
  with gr.Row():
 
1430
  with gr.Row():
1431
  clear_button_text = gr.Button(value="Clear Text", interactive=True)
1432
  submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
1433
+ # upvote_btn = gr.Button(value="πŸ‘ Upvote", interactive=True)
1434
+ # downvote_btn = gr.Button(value="πŸ‘Ž Downvote", interactive=True)
1435
 
1436
 
1437
  with gr.Row():
 
1702
 
1703
  mv_images = gr.State()
1704
 
1705
+ chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
1706
 
1707
  submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
1708
  fn=generate_mvs,
 
1922
  queue=True
1923
  )
1924
 
1925
+ # upvote_btn.click(
1926
+ # handle_liked,
1927
+ # inputs=[state,like_res],
1928
+ # outputs=[chatbot,like_res]
1929
+ # )
1930
 
1931
+ # downvote_btn.click(
1932
+ # handle_disliked,
1933
+ # inputs=[state,dislike_res],
1934
+ # outputs=[chatbot,dislike_res]
1935
+ # )
1936
 
1937
 
1938
 
 
1946
  iface.queue(api_open=False, max_size=10)
1947
  # iface.queue(concurrency_count=5, api_open=False, max_size=10)
1948
  iface.launch(server_name="0.0.0.0")
1949
+