Spaces:
Running
Running
phyloforfun
commited on
Commit
·
c824976
1
Parent(s):
1d9ab62
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
Browse files- app.py +24 -5
- run_VoucherVision.py +35 -16
- vouchervision/general_utils.py +1 -0
app.py
CHANGED
@@ -308,7 +308,6 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
|
|
308 |
|
309 |
@st.cache_data
|
310 |
def handle_image_upload_and_gallery():
|
311 |
-
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
312 |
|
313 |
if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
314 |
if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
|
@@ -381,6 +380,7 @@ def content_input_images(col_left, col_right):
|
|
381 |
handle_image_upload_and_gallery_hf(uploaded_files)
|
382 |
|
383 |
else:
|
|
|
384 |
handle_image_upload_and_gallery()
|
385 |
|
386 |
def list_jpg_files(directory_path):
|
@@ -468,12 +468,19 @@ def use_test_image():
|
|
468 |
clear_image_uploads()
|
469 |
st.session_state['uploader_idk'] += 1
|
470 |
for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
471 |
-
|
|
|
|
|
|
|
|
|
472 |
st.session_state['input_list'].append(file_path)
|
473 |
|
474 |
img = Image.open(file_path)
|
475 |
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
476 |
-
|
|
|
|
|
|
|
477 |
st.session_state['input_list_small'].append(file_path_small)
|
478 |
|
479 |
|
@@ -1667,7 +1674,20 @@ def content_prompt_and_llm_version():
|
|
1667 |
with col_llm_1:
|
1668 |
GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
|
1669 |
st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
|
1670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1671 |
|
1672 |
|
1673 |
def content_api_check():
|
@@ -2186,7 +2206,6 @@ def content_less_used():
|
|
2186 |
#################################################################################################################################################
|
2187 |
# Sidebar #######################################################################################################################################
|
2188 |
#################################################################################################################################################
|
2189 |
-
@st.cache_data
|
2190 |
def sidebar_content():
|
2191 |
if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
|
2192 |
validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
|
|
|
308 |
|
309 |
@st.cache_data
|
310 |
def handle_image_upload_and_gallery():
|
|
|
311 |
|
312 |
if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
313 |
if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
|
|
|
380 |
handle_image_upload_and_gallery_hf(uploaded_files)
|
381 |
|
382 |
else:
|
383 |
+
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
384 |
handle_image_upload_and_gallery()
|
385 |
|
386 |
def list_jpg_files(directory_path):
|
|
|
468 |
clear_image_uploads()
|
469 |
st.session_state['uploader_idk'] += 1
|
470 |
for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
471 |
+
try:
|
472 |
+
file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
|
473 |
+
except:
|
474 |
+
file_path = save_uploaded_file_local(os.path.join(st.session_state.dir_home,'demo','demo_images'),os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
|
475 |
+
|
476 |
st.session_state['input_list'].append(file_path)
|
477 |
|
478 |
img = Image.open(file_path)
|
479 |
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
480 |
+
try:
|
481 |
+
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file, img)
|
482 |
+
except:
|
483 |
+
file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file, img)
|
484 |
st.session_state['input_list_small'].append(file_path_small)
|
485 |
|
486 |
|
|
|
1674 |
with col_llm_1:
|
1675 |
GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
|
1676 |
st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
|
1677 |
+
st.markdown("""
|
1678 |
+
Based on preliminary results, the following models perform the best. We are currently running tests of all possible OCR + LLM + Prompt combinations to create recipes for different workflows.
|
1679 |
+
- `Mistral Medium`
|
1680 |
+
- `Mistral Small`
|
1681 |
+
- `Mistral Tiny`
|
1682 |
+
- `PaLM 2 text-bison@001`
|
1683 |
+
- `GPT 4 Turbo 1106-preview`
|
1684 |
+
- `GPT 3.5 Instruct`
|
1685 |
+
- `LOCAL Mixtral 7Bx8 Instruct`
|
1686 |
+
- `LOCAL Mixtral 7B Instruct`
|
1687 |
+
|
1688 |
+
Larger models (e.g., `GPT 4`, `GPT 4 32k`, `Gemini Pro`) do not necessarily perform better for these tasks. MistralAI models exceeded our expectations and perform extremely well. PaLM 2 text-bison@001 also seems to consistently out-perform Gemini Pro.
|
1689 |
+
|
1690 |
+
The `SLTPvA_short.yaml` prompt also seems to work better with smaller LLMs (e.g., Mistral Tiny). Alternatively, enable double OCR to help the LLM focus on the OCR text given a longer prompt.""")
|
1691 |
|
1692 |
|
1693 |
def content_api_check():
|
|
|
2206 |
#################################################################################################################################################
|
2207 |
# Sidebar #######################################################################################################################################
|
2208 |
#################################################################################################################################################
|
|
|
2209 |
def sidebar_content():
|
2210 |
if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
|
2211 |
validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
|
run_VoucherVision.py
CHANGED
@@ -1,10 +1,26 @@
|
|
1 |
import streamlit.web.cli as stcli
|
2 |
-
import os, sys
|
3 |
|
4 |
# pip install protobuf==3.20.0
|
5 |
# pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
|
6 |
# pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
|
|
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
def resolve_path(path):
|
@@ -15,18 +31,21 @@ def resolve_path(path):
|
|
15 |
if __name__ == "__main__":
|
16 |
dir_home = os.path.dirname(__file__)
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
1 |
import streamlit.web.cli as stcli
|
2 |
+
import os, sys, socket
|
3 |
|
4 |
# pip install protobuf==3.20.0
|
5 |
# pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
|
6 |
# pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
|
7 |
+
# pip install protobuf==3.20.0
|
8 |
|
9 |
+
def find_available_port(start_port, max_attempts=1000):
|
10 |
+
port = start_port
|
11 |
+
attempts = 0
|
12 |
+
while attempts < max_attempts:
|
13 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
14 |
+
try:
|
15 |
+
s.bind(("127.0.0.1", port))
|
16 |
+
# If successful, return the current port
|
17 |
+
return port
|
18 |
+
except socket.error:
|
19 |
+
# If the port is in use, increment the port number and try again
|
20 |
+
port += 1
|
21 |
+
attempts += 1
|
22 |
+
# Optional: Return None or raise an exception if no port is found within the attempts limit
|
23 |
+
raise ValueError(f"Could not find an available port within {max_attempts} attempts starting from port {start_port}.")
|
24 |
|
25 |
|
26 |
def resolve_path(path):
|
|
|
31 |
if __name__ == "__main__":
|
32 |
dir_home = os.path.dirname(__file__)
|
33 |
|
34 |
+
start_port = 8529
|
35 |
+
try:
|
36 |
+
free_port = find_available_port(start_port)
|
37 |
+
sys.argv = [
|
38 |
+
"streamlit",
|
39 |
+
"run",
|
40 |
+
resolve_path(os.path.join(os.path.dirname(__file__),"app.py")),
|
41 |
+
# resolve_path(os.path.join(dir_home,"vouchervision", "VoucherVision_GUI.py")),
|
42 |
+
"--global.developmentMode=false",
|
43 |
+
# "--server.port=8545",
|
44 |
+
"--server.port=8546",
|
45 |
+
# Toggle below for HF vs Local
|
46 |
+
# "--is_hf=1",
|
47 |
+
# "--is_hf=0",
|
48 |
+
]
|
49 |
+
sys.exit(stcli.main())
|
50 |
+
except ValueError as e:
|
51 |
+
print(e)
|
vouchervision/general_utils.py
CHANGED
@@ -106,6 +106,7 @@ def save_token_info_as_csv(Dirs, LLM_version0, path_api_cost, total_tokens_in, t
|
|
106 |
else:
|
107 |
return None #TODO add config tests to expense_report
|
108 |
|
|
|
109 |
def summarize_expense_report(path_expense_report):
|
110 |
# Initialize counters and sums
|
111 |
run_count = 0
|
|
|
106 |
else:
|
107 |
return None #TODO add config tests to expense_report
|
108 |
|
109 |
+
@st.cache_data
|
110 |
def summarize_expense_report(path_expense_report):
|
111 |
# Initialize counters and sums
|
112 |
run_count = 0
|