phyloforfun commited on
Commit
c824976
·
1 Parent(s): 1d9ab62

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
Files changed (3) hide show
  1. app.py +24 -5
  2. run_VoucherVision.py +35 -16
  3. vouchervision/general_utils.py +1 -0
app.py CHANGED
@@ -308,7 +308,6 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
308
 
309
  @st.cache_data
310
  def handle_image_upload_and_gallery():
311
- st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
312
 
313
  if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
314
  if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
@@ -381,6 +380,7 @@ def content_input_images(col_left, col_right):
381
  handle_image_upload_and_gallery_hf(uploaded_files)
382
 
383
  else:
 
384
  handle_image_upload_and_gallery()
385
 
386
  def list_jpg_files(directory_path):
@@ -468,12 +468,19 @@ def use_test_image():
468
  clear_image_uploads()
469
  st.session_state['uploader_idk'] += 1
470
  for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
471
- file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
 
 
 
 
472
  st.session_state['input_list'].append(file_path)
473
 
474
  img = Image.open(file_path)
475
  img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
476
- file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file, img)
 
 
 
477
  st.session_state['input_list_small'].append(file_path_small)
478
 
479
 
@@ -1667,7 +1674,20 @@ def content_prompt_and_llm_version():
1667
  with col_llm_1:
1668
  GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
1669
  st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
1670
-
 
 
 
 
 
 
 
 
 
 
 
 
 
1671
 
1672
 
1673
  def content_api_check():
@@ -2186,7 +2206,6 @@ def content_less_used():
2186
  #################################################################################################################################################
2187
  # Sidebar #######################################################################################################################################
2188
  #################################################################################################################################################
2189
- @st.cache_data
2190
  def sidebar_content():
2191
  if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
2192
  validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
 
308
 
309
  @st.cache_data
310
  def handle_image_upload_and_gallery():
 
311
 
312
  if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
313
  if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
 
380
  handle_image_upload_and_gallery_hf(uploaded_files)
381
 
382
  else:
383
+ st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
384
  handle_image_upload_and_gallery()
385
 
386
  def list_jpg_files(directory_path):
 
468
  clear_image_uploads()
469
  st.session_state['uploader_idk'] += 1
470
  for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
471
+ try:
472
+ file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
473
+ except:
474
+ file_path = save_uploaded_file_local(os.path.join(st.session_state.dir_home,'demo','demo_images'),os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
475
+
476
  st.session_state['input_list'].append(file_path)
477
 
478
  img = Image.open(file_path)
479
  img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
480
+ try:
481
+ file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file, img)
482
+ except:
483
+ file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file, img)
484
  st.session_state['input_list_small'].append(file_path_small)
485
 
486
 
 
1674
  with col_llm_1:
1675
  GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
1676
  st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
1677
+ st.markdown("""
1678
+ Based on preliminary results, the following models perform the best. We are currently running tests of all possible OCR + LLM + Prompt combinations to create recipes for different workflows.
1679
+ - `Mistral Medium`
1680
+ - `Mistral Small`
1681
+ - `Mistral Tiny`
1682
+ - `PaLM 2 text-bison@001`
1683
+ - `GPT 4 Turbo 1106-preview`
1684
+ - `GPT 3.5 Instruct`
1685
+ - `LOCAL Mixtral 7Bx8 Instruct`
1686
+ - `LOCAL Mixtral 7B Instruct`
1687
+
1688
+ Larger models (e.g., `GPT 4`, `GPT 4 32k`, `Gemini Pro`) do not necessarily perform better for these tasks. MistralAI models exceeded our expectations and perform extremely well. PaLM 2 text-bison@001 also seems to consistently out-perform Gemini Pro.
1689
+
1690
+ The `SLTPvA_short.yaml` prompt also seems to work better with smaller LLMs (e.g., Mistral Tiny). Alternatively, enable double OCR to help the LLM focus on the OCR text given a longer prompt.""")
1691
 
1692
 
1693
  def content_api_check():
 
2206
  #################################################################################################################################################
2207
  # Sidebar #######################################################################################################################################
2208
  #################################################################################################################################################
 
2209
  def sidebar_content():
2210
  if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
2211
  validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
run_VoucherVision.py CHANGED
@@ -1,10 +1,26 @@
1
  import streamlit.web.cli as stcli
2
- import os, sys
3
 
4
  # pip install protobuf==3.20.0
5
  # pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
6
  # pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def resolve_path(path):
@@ -15,18 +31,21 @@ def resolve_path(path):
15
  if __name__ == "__main__":
16
  dir_home = os.path.dirname(__file__)
17
 
18
- # pip install protobuf==3.20.0
19
-
20
- sys.argv = [
21
- "streamlit",
22
- "run",
23
- resolve_path(os.path.join(dir_home,"app.py")),
24
- # resolve_path(os.path.join(dir_home,"vouchervision", "VoucherVision_GUI.py")),
25
- "--global.developmentMode=false",
26
- # "--server.port=8545",
27
- "--server.port=8546",
28
- # Toggle below for HF vs Local
29
- # "--is_hf=1",
30
- # "--is_hf=0",
31
- ]
32
- sys.exit(stcli.main())
 
 
 
 
1
  import streamlit.web.cli as stcli
2
+ import os, sys, socket
3
 
4
  # pip install protobuf==3.20.0
5
  # pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
6
  # pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
7
+ # pip install protobuf==3.20.0
8
 
9
+ def find_available_port(start_port, max_attempts=1000):
10
+ port = start_port
11
+ attempts = 0
12
+ while attempts < max_attempts:
13
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
14
+ try:
15
+ s.bind(("127.0.0.1", port))
16
+ # If successful, return the current port
17
+ return port
18
+ except socket.error:
19
+ # If the port is in use, increment the port number and try again
20
+ port += 1
21
+ attempts += 1
22
+ # Optional: Return None or raise an exception if no port is found within the attempts limit
23
+ raise ValueError(f"Could not find an available port within {max_attempts} attempts starting from port {start_port}.")
24
 
25
 
26
  def resolve_path(path):
 
31
  if __name__ == "__main__":
32
  dir_home = os.path.dirname(__file__)
33
 
34
+ start_port = 8529
35
+ try:
36
+ free_port = find_available_port(start_port)
37
+ sys.argv = [
38
+ "streamlit",
39
+ "run",
40
+ resolve_path(os.path.join(os.path.dirname(__file__),"app.py")),
41
+ # resolve_path(os.path.join(dir_home,"vouchervision", "VoucherVision_GUI.py")),
42
+ "--global.developmentMode=false",
43
+ # "--server.port=8545",
44
+ "--server.port=8546",
45
+ # Toggle below for HF vs Local
46
+ # "--is_hf=1",
47
+ # "--is_hf=0",
48
+ ]
49
+ sys.exit(stcli.main())
50
+ except ValueError as e:
51
+ print(e)
vouchervision/general_utils.py CHANGED
@@ -106,6 +106,7 @@ def save_token_info_as_csv(Dirs, LLM_version0, path_api_cost, total_tokens_in, t
106
  else:
107
  return None #TODO add config tests to expense_report
108
 
 
109
  def summarize_expense_report(path_expense_report):
110
  # Initialize counters and sums
111
  run_count = 0
 
106
  else:
107
  return None #TODO add config tests to expense_report
108
 
109
+ @st.cache_data
110
  def summarize_expense_report(path_expense_report):
111
  # Initialize counters and sums
112
  run_count = 0