Spaces:
Runtime error
Runtime error
achterbrain
commited on
Commit
•
b18bb39
1
Parent(s):
1b60893
updated to v0.0.5
Browse files- Dashboard.py +49 -63
- Dashboard_setup.py +16 -1
- {Graphics → assets}/IL_Logo.png +0 -0
- {Data → data}/Prompt_dir_221128.csv +0 -0
- {Data → data}/Prompt_dir_221215.csv +0 -0
- {Data → data}/Prompt_dir_221216.csv +0 -0
- {Data → data}/Prompt_dir_230104.csv +0 -0
- data/Prompt_dir_230131.csv +0 -0
- pages/1_⚙️Manual assessment.py +52 -57
- pages/2_🤖Automated assessment.py +15 -3
- pages/3_📊Assessment summary.py +22 -11
- pages/Functions/Dashboard_functions.py +222 -95
Dashboard.py
CHANGED
@@ -1,10 +1,20 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
-
from Dashboard_setup import prompt_dir, automated_task_list
|
5 |
-
from pages.Functions.Dashboard_functions import prompt_to_csv
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
## Add prompt directory to session state
|
9 |
st.session_state['prompt_dir'] = prompt_dir
|
10 |
## Create lists of prompts for manual and automated assessments
|
@@ -14,48 +24,29 @@ automated_prompts = prompt_dir.loc[
|
|
14 |
(prompt_dir['Task']).isin(st.session_state['automated_tasks'])].ID.tolist()
|
15 |
manual_prompts = prompt_dir.ID.tolist()
|
16 |
|
17 |
-
#
|
18 |
-
|
19 |
-
st.
|
20 |
-
|
21 |
-
st.
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
31 |
with st.expander("Prompt downloader"):
|
32 |
st.write('Select the number of prompts you want to download for each task category. The set of prompts will automatically also include all single objects appearing in the selected prompts.')
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
i_task,
|
37 |
-
value = prompt_task_count[i_task],
|
38 |
-
max_value=prompt_task_count[i_task],
|
39 |
-
min_value=0,
|
40 |
-
step = 1)
|
41 |
-
|
42 |
-
# Create df with selected number of prompts per task
|
43 |
-
for i_task in prompt_task_select.index:
|
44 |
-
temp_df = prompt_dir.loc[prompt_dir['Task']==i_task][0:prompt_task_select[i_task]]
|
45 |
-
if len(temp_df)>0:
|
46 |
-
prompt_download_dict[i_task]=temp_df
|
47 |
-
|
48 |
-
# Concat all tasks to dataframe
|
49 |
-
prompt_download = pd.concat(prompt_download_dict.values())
|
50 |
-
# Exclude prompts from single object prompt download, as else the int transform gives an error
|
51 |
-
single_object_prompt_download = prompt_download.dropna(subset='Linked_prompts')
|
52 |
-
|
53 |
-
# Add relevant single object prompts
|
54 |
-
single_object_ids = single_object_prompt_download.Linked_prompts.str.split(',').explode().unique().astype('int')
|
55 |
-
prompt_download = pd.concat([
|
56 |
-
prompt_download,
|
57 |
-
prompt_dir.loc[prompt_dir['ID'].isin(single_object_ids)]
|
58 |
-
])
|
59 |
|
60 |
# For img2img prompt, the prompt in the download gets replaced by img2img instructions
|
61 |
img2img_instructions_col = prompt_download.loc[prompt_download['Task'].str.startswith('img2img')]['img2img_instructions']
|
@@ -64,28 +55,15 @@ with st.expander("Prompt downloader"):
|
|
64 |
# Add download button for prompts
|
65 |
st.download_button(
|
66 |
label="Download prompts",
|
67 |
-
data=prompt_to_csv(prompt_download),
|
68 |
file_name='prompt_list.csv',
|
69 |
mime='text/csv',
|
70 |
)
|
71 |
|
72 |
|
73 |
-
# Generate empty dataset for results, if it does not exist yet
|
74 |
-
try:
|
75 |
-
num_uploaded_images = st.session_state['eval_df'].shape[0]
|
76 |
-
except KeyError:
|
77 |
-
st.session_state['eval_df'] = pd.DataFrame(
|
78 |
-
columns=['File_name','Prompt_no','automated_eval','manual_eval','manual_eval_completed','manual_eval_task_score'])
|
79 |
-
st.session_state['uploaded_img'] = []
|
80 |
|
81 |
-
# Create dic for automated asssssment if it does not excist yet
|
82 |
-
try:
|
83 |
-
test_dict = st.session_state['results_dict']
|
84 |
-
except KeyError:
|
85 |
-
st.session_state['results_dict'] = {}
|
86 |
|
87 |
-
|
88 |
-
# Data upload setup
|
89 |
st.subheader('Data upload')
|
90 |
#uploaded_files = st.file_uploader('Upload generated images', accept_multiple_files=True)
|
91 |
with st.form("my-form", clear_on_submit=True):
|
@@ -98,8 +76,6 @@ with st.form("my-form", clear_on_submit=True):
|
|
98 |
submitted = st.form_submit_button("Add images")
|
99 |
st.session_state['uploaded_img'] = st.session_state['uploaded_img']+uploaded_files
|
100 |
|
101 |
-
|
102 |
-
|
103 |
# Add new uploaded images to session state
|
104 |
## Try to append it to pre-existing list, else create new list in session state
|
105 |
## Always reset uploaded files to empty list after they have been added to state
|
@@ -107,16 +83,24 @@ if len(uploaded_files) != 0:
|
|
107 |
try:
|
108 |
# Extract prompts of uploaded files
|
109 |
file_names = [x.name for x in uploaded_files]
|
110 |
-
files_prompts = [x.split('_')[0][1:] for x in file_names]
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
# Create manual evaluation df
|
113 |
-
df_dict = {'File_name':file_names, 'Prompt_no':files_prompts}
|
114 |
eval_df = pd.DataFrame(df_dict)
|
115 |
eval_df['automated_eval'] = eval_df['Prompt_no'].astype('int').isin(automated_prompts)
|
116 |
eval_df['manual_eval'] = eval_df['Prompt_no'].astype('int').isin(manual_prompts)
|
117 |
eval_df['manual_eval_completed'] = False
|
118 |
eval_df['manual_eval_task_score'] = np.nan
|
119 |
|
|
|
|
|
|
|
120 |
# Exclude given percentage of uploaded images from manual assessment; with random selection
|
121 |
if man_assessment_share == '50%':
|
122 |
reassign_number = int(len(eval_df)/2)
|
@@ -139,6 +123,7 @@ if len(uploaded_files) != 0:
|
|
139 |
st.session_state['uploaded_img'] = uploaded_files
|
140 |
|
141 |
|
|
|
142 |
eval_df = st.session_state['eval_df']
|
143 |
if eval_df.shape[0]!=0:
|
144 |
# Print current state of uploaded data
|
@@ -149,6 +134,7 @@ if eval_df.shape[0]!=0:
|
|
149 |
|
150 |
if eval_df.shape[0]>sum(eval_df.manual_eval):
|
151 |
st.write('WARNING: {0} image(s) with invalid file names uploaded. Pictures with invalid names will not be available for assessment. Use the file names provided by the prompt downloader to correctly name your generated images.'.format(str(eval_df.shape[0]-sum(eval_df.manual_eval))))
|
152 |
-
|
|
|
153 |
else:
|
154 |
st.write("Upload files to start the assessment.")
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
+
from Dashboard_setup import prompt_dir, automated_task_list, sidebar_information, compatible_versions, dashboard_version_code
|
5 |
+
from pages.Functions.Dashboard_functions import prompt_to_csv, prompt_df_for_download
|
6 |
|
7 |
+
|
8 |
+
# Page
|
9 |
+
st.title('Generative Image Benchmark')
|
10 |
+
st.write('This is an evaluation platform to assess the performance of image generation algorithms developed by Intel Labs. This is the beta version of the platform.')
|
11 |
+
st.subheader('User guide')
|
12 |
+
st.write('To assess a generative image algorithm, download a set of prompts using the prompt downloader below. Generate one image per prompt and use the file names provided to name your images. Upload these generated images in the data upload section below. The pages for manual assessment and automated assessment allow you to systematically assess the generated images. The results will be presented and ready for download on the assessment summary page.')
|
13 |
+
sidebar_information()
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
###### Setup of variables ############################
|
18 |
## Add prompt directory to session state
|
19 |
st.session_state['prompt_dir'] = prompt_dir
|
20 |
## Create lists of prompts for manual and automated assessments
|
|
|
24 |
(prompt_dir['Task']).isin(st.session_state['automated_tasks'])].ID.tolist()
|
25 |
manual_prompts = prompt_dir.ID.tolist()
|
26 |
|
27 |
+
# Generate empty dataset for results, if it does not exist yet
|
28 |
+
try:
|
29 |
+
num_uploaded_images = st.session_state['eval_df'].shape[0]
|
30 |
+
except KeyError:
|
31 |
+
st.session_state['eval_df'] = pd.DataFrame(
|
32 |
+
columns=['File_name','Prompt_no','automated_eval','manual_eval','manual_eval_completed','manual_eval_task_score'])
|
33 |
+
st.session_state['uploaded_img'] = []
|
34 |
+
|
35 |
+
# Create dic for automated asssssment if it does not excist yet
|
36 |
+
try:
|
37 |
+
test_dict = st.session_state['results_dict']
|
38 |
+
except KeyError:
|
39 |
+
st.session_state['results_dict'] = {}
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
###### Prompt downloader ############################
|
44 |
+
## Add prompt downloading routine in expander box
|
45 |
with st.expander("Prompt downloader"):
|
46 |
st.write('Select the number of prompts you want to download for each task category. The set of prompts will automatically also include all single objects appearing in the selected prompts.')
|
47 |
+
|
48 |
+
# Add elements to allow user to select count of prompts per task
|
49 |
+
prompt_download = prompt_df_for_download(prompt_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
# For img2img prompt, the prompt in the download gets replaced by img2img instructions
|
52 |
img2img_instructions_col = prompt_download.loc[prompt_download['Task'].str.startswith('img2img')]['img2img_instructions']
|
|
|
55 |
# Add download button for prompts
|
56 |
st.download_button(
|
57 |
label="Download prompts",
|
58 |
+
data=prompt_to_csv(prompt_download, added_version_code=dashboard_version_code),
|
59 |
file_name='prompt_list.csv',
|
60 |
mime='text/csv',
|
61 |
)
|
62 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
+
###### Data uploader and eval_df creation ############################
|
|
|
67 |
st.subheader('Data upload')
|
68 |
#uploaded_files = st.file_uploader('Upload generated images', accept_multiple_files=True)
|
69 |
with st.form("my-form", clear_on_submit=True):
|
|
|
76 |
submitted = st.form_submit_button("Add images")
|
77 |
st.session_state['uploaded_img'] = st.session_state['uploaded_img']+uploaded_files
|
78 |
|
|
|
|
|
79 |
# Add new uploaded images to session state
|
80 |
## Try to append it to pre-existing list, else create new list in session state
|
81 |
## Always reset uploaded files to empty list after they have been added to state
|
|
|
83 |
try:
|
84 |
# Extract prompts of uploaded files
|
85 |
file_names = [x.name for x in uploaded_files]
|
86 |
+
files_prompts = [x.split('_',maxsplit=1)[0][1:] for x in file_names]
|
87 |
+
try:
|
88 |
+
files_versions = [x.split('_v',maxsplit=1)[1] for x in file_names]
|
89 |
+
files_compatible = [x.rsplit('.',1)[0] in compatible_versions for x in files_versions]
|
90 |
+
except IndexError:
|
91 |
+
files_compatible = [False]*len(files_prompts)
|
92 |
|
93 |
# Create manual evaluation df
|
94 |
+
df_dict = {'File_name':file_names, 'Prompt_no':files_prompts, 'File_compatible':files_compatible}
|
95 |
eval_df = pd.DataFrame(df_dict)
|
96 |
eval_df['automated_eval'] = eval_df['Prompt_no'].astype('int').isin(automated_prompts)
|
97 |
eval_df['manual_eval'] = eval_df['Prompt_no'].astype('int').isin(manual_prompts)
|
98 |
eval_df['manual_eval_completed'] = False
|
99 |
eval_df['manual_eval_task_score'] = np.nan
|
100 |
|
101 |
+
# Set manual and automated eval = False if files are not compatible
|
102 |
+
eval_df.loc[eval_df['File_compatible']==False,['automated_eval','manual_eval']]=False
|
103 |
+
|
104 |
# Exclude given percentage of uploaded images from manual assessment; with random selection
|
105 |
if man_assessment_share == '50%':
|
106 |
reassign_number = int(len(eval_df)/2)
|
|
|
123 |
st.session_state['uploaded_img'] = uploaded_files
|
124 |
|
125 |
|
126 |
+
###### Upload status visualisation ############################
|
127 |
eval_df = st.session_state['eval_df']
|
128 |
if eval_df.shape[0]!=0:
|
129 |
# Print current state of uploaded data
|
|
|
134 |
|
135 |
if eval_df.shape[0]>sum(eval_df.manual_eval):
|
136 |
st.write('WARNING: {0} image(s) with invalid file names uploaded. Pictures with invalid names will not be available for assessment. Use the file names provided by the prompt downloader to correctly name your generated images.'.format(str(eval_df.shape[0]-sum(eval_df.manual_eval))))
|
137 |
+
if eval_df.shape[0]>sum(eval_df.File_compatible):
|
138 |
+
st.write('WARNING: Some of the images uploaded are not compatible with this version of benchmark software. Please go to https://github.com/8erberg/Intel-Generative-Image-Dashboard-experimental/blob/main/README.md to learn more about hosting the version compatible with your images.')
|
139 |
else:
|
140 |
st.write("Upload files to start the assessment.")
|
Dashboard_setup.py
CHANGED
@@ -1,4 +1,19 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
automated_task_list = ['Multiple object types', 'Single object','Negation']
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
import pandas as pd
|
3 |
|
4 |
+
# Dashboard version variables
|
5 |
+
code_version = 'v0.0.5'
|
6 |
+
prompt_dir_version = '230131'
|
7 |
+
compatible_versions = ['0.0.5_pd230118', 'None']
|
8 |
+
dashboard_version_code = code_version+'_pd'+prompt_dir_version
|
9 |
+
|
10 |
+
# List of tasks which are automated in current version - note that each of these needs a corresponding evaluation function in Dashboard_automation_setup.py
|
11 |
automated_task_list = ['Multiple object types', 'Single object','Negation']
|
12 |
+
|
13 |
+
# Import the list of prompts used in current version
|
14 |
+
prompt_dir = pd.read_csv('data/Prompt_dir_{0}.csv'.format(prompt_dir_version))
|
15 |
+
|
16 |
+
# Create sidebar information
|
17 |
+
def sidebar_information():
|
18 |
+
st.sidebar.image('assets/IL_Logo.png')
|
19 |
+
st.sidebar.text(dashboard_version_code)
|
{Graphics → assets}/IL_Logo.png
RENAMED
File without changes
|
{Data → data}/Prompt_dir_221128.csv
RENAMED
File without changes
|
{Data → data}/Prompt_dir_221215.csv
RENAMED
File without changes
|
{Data → data}/Prompt_dir_221216.csv
RENAMED
File without changes
|
{Data → data}/Prompt_dir_230104.csv
RENAMED
File without changes
|
data/Prompt_dir_230131.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pages/1_⚙️Manual assessment.py
CHANGED
@@ -2,18 +2,20 @@ import streamlit as st
|
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
from PIL import Image
|
5 |
-
from pages.Functions.Dashboard_functions import add_previous_manual_assessments, delete_last_manual_rating
|
6 |
-
|
7 |
|
8 |
st.title('Manual assessment')
|
9 |
st.write('On this page you can rate all uploaded images with regards to how good they match their respective prompts. You can see the outcome of your assessment on the summary page.')
|
10 |
st.write(' ')
|
11 |
-
|
12 |
-
st.sidebar.image(side_image)
|
13 |
# Create placeholders for key elements
|
14 |
assessment_header = st.empty()
|
|
|
15 |
assessment_progress = st.empty()
|
|
|
16 |
|
|
|
17 |
# Extract how many images are available for manual assessment in entire uploaded dataset
|
18 |
## Set to zero if the dataset has not been created yet due to starting the app on an assessment page
|
19 |
manual_eval_available = 0
|
@@ -21,6 +23,7 @@ try:
|
|
21 |
curr_eval_df = st.session_state['eval_df']
|
22 |
curr_eval_df['Picture_index']=curr_eval_df.index.values
|
23 |
curr_manual_eval = curr_eval_df.loc[(curr_eval_df['manual_eval']==True)&(curr_eval_df['manual_eval_completed']==False)]
|
|
|
24 |
manual_eval_available = len(curr_manual_eval)
|
25 |
curr_prompt_dir = st.session_state['prompt_dir']
|
26 |
except KeyError:
|
@@ -36,18 +39,18 @@ except IndexError:
|
|
36 |
pass
|
37 |
|
38 |
|
39 |
-
|
40 |
## If images are available for rating this creates a from to submit ratings to database
|
41 |
## If subprompt option is selected, it expands the form to include these as well
|
42 |
## If no images are available it prints situation specific instructions
|
43 |
if manual_eval_available > 0:
|
44 |
assessment_header.subheader('Assess uploaded images')
|
45 |
# Let user choose whether subprompts should be presented
|
46 |
-
include_subprompts =
|
47 |
|
48 |
-
# Update the progress statement
|
49 |
assessment_progress.write('{0} images ready / left for assessment.'.format(manual_eval_available))
|
50 |
-
|
51 |
|
52 |
# Extract first example for manual assessment which is not rated yet (first meaning the lowest index, for lowest prompt number)
|
53 |
## Also extract relevant metadata of this example
|
@@ -83,41 +86,25 @@ if manual_eval_available > 0:
|
|
83 |
curr_prompt_dir.loc[curr_prompt_dir['ID']==int(curr_manual_eval_row.Prompt_no.item())]['Prompt'].item()
|
84 |
))
|
85 |
# Exclude prompt from rating if user chooses to
|
86 |
-
|
87 |
-
|
|
|
88 |
# Show image of current prompt and rating
|
89 |
st.image(st.session_state['uploaded_img'][curr_manual_eval_row.Picture_index.item()],width=350)
|
|
|
|
|
|
|
|
|
|
|
90 |
curr_manual_eval_row['manual_eval_task_score'] = st.radio(
|
91 |
-
"Does the image match the prompt?",('Yes', 'No'), horizontal=True, key='base')
|
92 |
|
93 |
st.write(' ') # Create whitespace
|
94 |
st.write(' ') # Create whitespace
|
95 |
|
96 |
-
#
|
97 |
-
#
|
98 |
-
|
99 |
-
if type(curr_linked_prompts)==list:
|
100 |
-
curr_linked_rows = curr_eval_df.loc[
|
101 |
-
(curr_eval_df['manual_eval_completed']==False)&
|
102 |
-
(curr_eval_df['Prompt_no'].isin(curr_linked_prompts))]
|
103 |
-
curr_linked_rows = curr_linked_rows.groupby('Prompt_no').first()
|
104 |
-
else:
|
105 |
-
curr_linked_rows = pd.DataFrame()
|
106 |
-
|
107 |
-
# Create rating for subprompts if a df for subprompt info was created
|
108 |
-
for row in curr_linked_rows.itertuples():
|
109 |
-
# Prompt
|
110 |
-
st.write('Prompt: {0}'.format(
|
111 |
-
curr_prompt_dir.loc[curr_prompt_dir['ID']==int(row.Index)]['Prompt'].item()
|
112 |
-
))
|
113 |
-
# Image
|
114 |
-
st.image(st.session_state['uploaded_img'][row.Picture_index],width=350)
|
115 |
-
# Rating
|
116 |
-
curr_linked_rows.loc[curr_linked_rows['Picture_index']==row.Picture_index,'manual_eval_task_score'] = st.radio(
|
117 |
-
"Does the image match the prompt?",('Yes', 'No'), horizontal=True, key=row.Picture_index)
|
118 |
-
st.write(' ')
|
119 |
-
st.write(' ')
|
120 |
-
|
121 |
|
122 |
# Submit assessments to database
|
123 |
submitted = st.form_submit_button("Submit")
|
@@ -126,12 +113,13 @@ if manual_eval_available > 0:
|
|
126 |
temp_picture_index_list = []
|
127 |
|
128 |
# First add main prompt assessment
|
129 |
-
st.session_state['eval_df']
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
135 |
|
136 |
# Add picture index to temp list
|
137 |
temp_picture_index_list.append(curr_picture_index)
|
@@ -139,27 +127,32 @@ if manual_eval_available > 0:
|
|
139 |
# Add subprompt assessment if dataset was created for subprompts
|
140 |
# This stage will automatically be skipped if the df for linked prompts is empty
|
141 |
for row in curr_linked_rows.itertuples():
|
142 |
-
st.session_state['eval_df']
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
149 |
# Add picture index to temp list
|
150 |
temp_picture_index_list.append(row.Picture_index)
|
151 |
|
152 |
-
# Add temp list of picture indices to rating history
|
153 |
-
|
|
|
154 |
|
155 |
# Reset page after ratings were submitted
|
156 |
st.experimental_rerun()
|
157 |
|
158 |
-
#
|
159 |
-
delete_last_manual_rating(
|
|
|
|
|
160 |
|
161 |
-
#
|
162 |
-
add_previous_manual_assessments()
|
|
|
163 |
|
164 |
# If no files are uploaded
|
165 |
elif len(st.session_state['uploaded_img'])==0:
|
@@ -167,6 +160,8 @@ elif len(st.session_state['uploaded_img'])==0:
|
|
167 |
# If files are uploaded but all ratings are completed
|
168 |
else:
|
169 |
assessment_progress.write('You finished assessing the current batch of uploaded images. Upload more pictures of generate your results on the summary page.')
|
170 |
-
# Add option to return to last manual rating
|
171 |
-
delete_last_manual_rating()
|
172 |
|
|
|
|
|
|
|
|
|
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
from PIL import Image
|
5 |
+
from pages.Functions.Dashboard_functions import add_previous_manual_assessments, delete_last_manual_rating, if_true_rerun, radio_rating_index_translation, set_eval_df_rating_vals, collect_linked_prompt_ratings
|
6 |
+
from Dashboard_setup import sidebar_information, dashboard_version_code
|
7 |
|
8 |
st.title('Manual assessment')
|
9 |
st.write('On this page you can rate all uploaded images with regards to how good they match their respective prompts. You can see the outcome of your assessment on the summary page.')
|
10 |
st.write(' ')
|
11 |
+
sidebar_information()
|
|
|
12 |
# Create placeholders for key elements
|
13 |
assessment_header = st.empty()
|
14 |
+
include_subprompts_checkbox = st.empty()
|
15 |
assessment_progress = st.empty()
|
16 |
+
assessment_progress_bar = st.empty()
|
17 |
|
18 |
+
###### Setup of variables ############################
|
19 |
# Extract how many images are available for manual assessment in entire uploaded dataset
|
20 |
## Set to zero if the dataset has not been created yet due to starting the app on an assessment page
|
21 |
manual_eval_available = 0
|
|
|
23 |
curr_eval_df = st.session_state['eval_df']
|
24 |
curr_eval_df['Picture_index']=curr_eval_df.index.values
|
25 |
curr_manual_eval = curr_eval_df.loc[(curr_eval_df['manual_eval']==True)&(curr_eval_df['manual_eval_completed']==False)]
|
26 |
+
curr_manual_eval_max = len(curr_eval_df.loc[(curr_eval_df['manual_eval']==True)])
|
27 |
manual_eval_available = len(curr_manual_eval)
|
28 |
curr_prompt_dir = st.session_state['prompt_dir']
|
29 |
except KeyError:
|
|
|
39 |
pass
|
40 |
|
41 |
|
42 |
+
###### Rating loop ############################
|
43 |
## If images are available for rating this creates a from to submit ratings to database
|
44 |
## If subprompt option is selected, it expands the form to include these as well
|
45 |
## If no images are available it prints situation specific instructions
|
46 |
if manual_eval_available > 0:
|
47 |
assessment_header.subheader('Assess uploaded images')
|
48 |
# Let user choose whether subprompts should be presented
|
49 |
+
include_subprompts = include_subprompts_checkbox.checkbox('Show related subprompts if available (uploaded subprompts may not be shown if images have been assessed already).', value=True)
|
50 |
|
51 |
+
# Update the progress statement / bar
|
52 |
assessment_progress.write('{0} images ready / left for assessment.'.format(manual_eval_available))
|
53 |
+
assessment_progress_bar.progress(1-manual_eval_available/curr_manual_eval_max)
|
54 |
|
55 |
# Extract first example for manual assessment which is not rated yet (first meaning the lowest index, for lowest prompt number)
|
56 |
## Also extract relevant metadata of this example
|
|
|
86 |
curr_prompt_dir.loc[curr_prompt_dir['ID']==int(curr_manual_eval_row.Prompt_no.item())]['Prompt'].item()
|
87 |
))
|
88 |
# Exclude prompt from rating if user chooses to
|
89 |
+
exclude_prompt = st.checkbox('Exclude this prompt from manual assessment', value=False)
|
90 |
+
include_prompt = not exclude_prompt
|
91 |
+
|
92 |
# Show image of current prompt and rating
|
93 |
st.image(st.session_state['uploaded_img'][curr_manual_eval_row.Picture_index.item()],width=350)
|
94 |
+
|
95 |
+
# Preselected radio option
|
96 |
+
radio_preselect = radio_rating_index_translation(curr_manual_eval_row.manual_eval_task_score.item())
|
97 |
+
|
98 |
+
# Create rating element for main prompt
|
99 |
curr_manual_eval_row['manual_eval_task_score'] = st.radio(
|
100 |
+
"Does the image match the prompt?",('Yes', 'No'), horizontal=True, key='base', index=radio_preselect)
|
101 |
|
102 |
st.write(' ') # Create whitespace
|
103 |
st.write(' ') # Create whitespace
|
104 |
|
105 |
+
# Create elements to collect ratings on linked prompts
|
106 |
+
# This only happens if the current prompt has linked prompts and the user choose to show linked prompts
|
107 |
+
curr_linked_rows = collect_linked_prompt_ratings(curr_linked_prompts, curr_eval_df, curr_prompt_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
# Submit assessments to database
|
110 |
submitted = st.form_submit_button("Submit")
|
|
|
113 |
temp_picture_index_list = []
|
114 |
|
115 |
# First add main prompt assessment
|
116 |
+
st.session_state['eval_df'] = set_eval_df_rating_vals(
|
117 |
+
st.session_state['eval_df'],
|
118 |
+
picture_index=curr_picture_index,
|
119 |
+
manual_eval=include_prompt,
|
120 |
+
manual_eval_completed=True,
|
121 |
+
manual_eval_task_score=curr_manual_eval_row['manual_eval_task_score'].item()
|
122 |
+
)
|
123 |
|
124 |
# Add picture index to temp list
|
125 |
temp_picture_index_list.append(curr_picture_index)
|
|
|
127 |
# Add subprompt assessment if dataset was created for subprompts
|
128 |
# This stage will automatically be skipped if the df for linked prompts is empty
|
129 |
for row in curr_linked_rows.itertuples():
|
130 |
+
st.session_state['eval_df'] = set_eval_df_rating_vals(
|
131 |
+
st.session_state['eval_df'],
|
132 |
+
picture_index=row.Picture_index,
|
133 |
+
manual_eval=include_prompt,
|
134 |
+
manual_eval_completed=True,
|
135 |
+
manual_eval_task_score=row.manual_eval_task_score
|
136 |
+
)
|
137 |
+
|
138 |
# Add picture index to temp list
|
139 |
temp_picture_index_list.append(row.Picture_index)
|
140 |
|
141 |
+
# Add temp list of picture indices to rating history, if prompt is not excluded
|
142 |
+
if include_prompt:
|
143 |
+
st.session_state['manual_rating_history'].append(temp_picture_index_list)
|
144 |
|
145 |
# Reset page after ratings were submitted
|
146 |
st.experimental_rerun()
|
147 |
|
148 |
+
# Allow user to return to last manual rating
|
149 |
+
st.session_state['manual_rating_history'],st.session_state['eval_df'], bool_rating_deleted = delete_last_manual_rating(
|
150 |
+
st.session_state['manual_rating_history'],st.session_state['eval_df'])
|
151 |
+
if_true_rerun(bool_rating_deleted)
|
152 |
|
153 |
+
# Allow user to upload past ratings and add them to eval_df
|
154 |
+
st.session_state['eval_df'], bool_ratings_uploaded = add_previous_manual_assessments(st.session_state['eval_df'],dashboard_version_code=dashboard_version_code)
|
155 |
+
if_true_rerun(bool_ratings_uploaded)
|
156 |
|
157 |
# If no files are uploaded
|
158 |
elif len(st.session_state['uploaded_img'])==0:
|
|
|
160 |
# If files are uploaded but all ratings are completed
|
161 |
else:
|
162 |
assessment_progress.write('You finished assessing the current batch of uploaded images. Upload more pictures of generate your results on the summary page.')
|
|
|
|
|
163 |
|
164 |
+
# Allow user to return to last manual rating
|
165 |
+
st.session_state['manual_rating_history'],st.session_state['eval_df'], bool_rating_deleted = delete_last_manual_rating(
|
166 |
+
st.session_state['manual_rating_history'],st.session_state['eval_df'])
|
167 |
+
if_true_rerun(bool_rating_deleted)
|
pages/2_🤖Automated assessment.py
CHANGED
@@ -2,13 +2,15 @@ import streamlit as st
|
|
2 |
import numpy as np
|
3 |
from itertools import compress
|
4 |
from PIL import Image
|
|
|
|
|
5 |
from Dashboard_automation_setup import fun_dict
|
6 |
|
7 |
st.title('Automated Assessment')
|
8 |
st.write('On this page you can use automated assessment algorithms to assess how good uploaded images match their respective prompts.')
|
9 |
st.write(' ')
|
10 |
-
st.sidebar.image('Graphics/IL_Logo.png')
|
11 |
|
|
|
12 |
try:
|
13 |
# Create necessary variables
|
14 |
prompt_dir = st.session_state['prompt_dir']
|
@@ -29,6 +31,7 @@ except KeyError:
|
|
29 |
automated_eval_available = 0
|
30 |
|
31 |
|
|
|
32 |
# If images for assessment available: create form to start assessment
|
33 |
# Else: Note to upload images for assessment
|
34 |
if automated_eval_available > 0:
|
@@ -56,14 +59,23 @@ if automated_eval_available > 0:
|
|
56 |
# Create list for tasks which were selected for assessment
|
57 |
selected_tasks = list(compress(task_list,task_list_selected))
|
58 |
|
59 |
-
|
60 |
# Create dataset to loop over with assessment
|
61 |
assessed_df = curr_eval_df.loc[
|
62 |
(curr_eval_df['automated_eval']==True)&
|
63 |
(curr_eval_df['Task'].isin(selected_tasks))]
|
64 |
results_column = []
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
for row in assessed_df.itertuples():
|
|
|
|
|
|
|
67 |
# Apply task based classifier and safe in list
|
68 |
temp_image = Image.open(st.session_state['uploaded_img'][row.Picture_index])
|
69 |
temp_result = fun_dict[row.Task](
|
@@ -72,6 +84,6 @@ if automated_eval_available > 0:
|
|
72 |
|
73 |
assessed_df['Score']=results_column
|
74 |
st.session_state['auto_eval_df']=assessed_df[['File_name','Prompt_no','Picture_index','Task','Score']]
|
75 |
-
st.write('
|
76 |
else:
|
77 |
st.write('Upload files on dashboard starting page to start automated assessment.')
|
|
|
2 |
import numpy as np
|
3 |
from itertools import compress
|
4 |
from PIL import Image
|
5 |
+
from Dashboard_setup import sidebar_information
|
6 |
+
sidebar_information() # Move this up to be displayed before the evaluation functions are loaded
|
7 |
from Dashboard_automation_setup import fun_dict
|
8 |
|
9 |
st.title('Automated Assessment')
|
10 |
st.write('On this page you can use automated assessment algorithms to assess how good uploaded images match their respective prompts.')
|
11 |
st.write(' ')
|
|
|
12 |
|
13 |
+
###### Setup of variables ############################
|
14 |
try:
|
15 |
# Create necessary variables
|
16 |
prompt_dir = st.session_state['prompt_dir']
|
|
|
31 |
automated_eval_available = 0
|
32 |
|
33 |
|
34 |
+
###### Rating loop ############################
|
35 |
# If images for assessment available: create form to start assessment
|
36 |
# Else: Note to upload images for assessment
|
37 |
if automated_eval_available > 0:
|
|
|
59 |
# Create list for tasks which were selected for assessment
|
60 |
selected_tasks = list(compress(task_list,task_list_selected))
|
61 |
|
|
|
62 |
# Create dataset to loop over with assessment
|
63 |
assessed_df = curr_eval_df.loc[
|
64 |
(curr_eval_df['automated_eval']==True)&
|
65 |
(curr_eval_df['Task'].isin(selected_tasks))]
|
66 |
results_column = []
|
67 |
|
68 |
+
# Add counter for progress bars
|
69 |
+
num_automated_rows = len(assessed_df)
|
70 |
+
i_num_row = 0
|
71 |
+
i_progress_increase = 1/num_automated_rows
|
72 |
+
st.write('Progress of automatic evaluation:')
|
73 |
+
auto_assessment_progress = st.progress(0)
|
74 |
+
|
75 |
for row in assessed_df.itertuples():
|
76 |
+
i_num_row +=1
|
77 |
+
auto_assessment_progress.progress(0+i_num_row*i_progress_increase)
|
78 |
+
|
79 |
# Apply task based classifier and safe in list
|
80 |
temp_image = Image.open(st.session_state['uploaded_img'][row.Picture_index])
|
81 |
temp_result = fun_dict[row.Task](
|
|
|
84 |
|
85 |
assessed_df['Score']=results_column
|
86 |
st.session_state['auto_eval_df']=assessed_df[['File_name','Prompt_no','Picture_index','Task','Score']]
|
87 |
+
st.write('Assessment completed. You can access the results on the summary page. Running a new automated assessment will override past results.')
|
88 |
else:
|
89 |
st.write('Upload files on dashboard starting page to start automated assessment.')
|
pages/3_📊Assessment summary.py
CHANGED
@@ -4,22 +4,28 @@ import seaborn as sns
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
from PIL import Image
|
6 |
from pages.Functions.Dashboard_functions import pre_assessment_visualisation, multi_comparison_plotI, print_results_tabs
|
7 |
-
|
8 |
-
|
9 |
|
10 |
|
11 |
-
|
12 |
-
def convert_df_to_csv(df):
|
13 |
-
|
14 |
-
return df[['File_name','Prompt_no','Task','Score']].to_csv().encode('utf-8')
|
15 |
|
16 |
-
assessment_result_frames = {}
|
17 |
|
|
|
|
|
|
|
|
|
18 |
|
|
|
19 |
st.title('Assessment Summary')
|
20 |
-
st.header('Manual assessment')
|
21 |
|
22 |
|
|
|
|
|
|
|
23 |
try:
|
24 |
if sum(st.session_state['eval_df']['manual_eval_completed'])>0:
|
25 |
# Display file uploader
|
@@ -29,7 +35,7 @@ try:
|
|
29 |
manual_eval_df['Score'] = manual_eval_df['manual_eval_task_score'].map({'Yes':True, 'No':False})
|
30 |
manual_results_df = manual_eval_df.loc[
|
31 |
(manual_eval_df['manual_eval']==True)&
|
32 |
-
(manual_eval_df['
|
33 |
manual_results_df['Model']='Manual assessment'
|
34 |
assessment_result_frames['Manual assessment'] = manual_results_df
|
35 |
|
@@ -38,7 +44,7 @@ try:
|
|
38 |
|
39 |
st.download_button(
|
40 |
label="Download manual assessment data",
|
41 |
-
data=
|
42 |
file_name='manual_assessment.csv',
|
43 |
mime='text/csv',
|
44 |
)
|
@@ -47,6 +53,8 @@ try:
|
|
47 |
except KeyError:
|
48 |
pre_assessment_visualisation(type_str='manual')
|
49 |
|
|
|
|
|
50 |
st.write(' ')
|
51 |
st.header('Automated assessment')
|
52 |
try:
|
@@ -63,7 +71,7 @@ try:
|
|
63 |
|
64 |
st.download_button(
|
65 |
label="Download automated assessment data",
|
66 |
-
data=
|
67 |
file_name='automated_assessment.csv',
|
68 |
mime='text/csv',
|
69 |
)
|
@@ -71,6 +79,8 @@ except KeyError:
|
|
71 |
pre_assessment_visualisation(type_str='automated')
|
72 |
|
73 |
|
|
|
|
|
74 |
try:
|
75 |
# Start gallery
|
76 |
st.header('Assessment gallery')
|
@@ -105,6 +115,7 @@ try:
|
|
105 |
curr_Prompt = curr_prompt_dir[curr_prompt_dir['ID']==int(curr_Prompt_no)].Prompt
|
106 |
curr_Picture_index = gallery_row_print.Picture_index.item()
|
107 |
# Plot prompt and image
|
|
|
108 |
st.write('Prompt: '+curr_Prompt.item())
|
109 |
st.image(st.session_state['uploaded_img'][curr_Picture_index],width=350)
|
110 |
|
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
from PIL import Image
|
6 |
from pages.Functions.Dashboard_functions import pre_assessment_visualisation, multi_comparison_plotI, print_results_tabs
|
7 |
+
from Dashboard_setup import sidebar_information, dashboard_version_code
|
8 |
+
sidebar_information()
|
9 |
|
10 |
|
11 |
+
#@st.cache
|
12 |
+
#def convert_df_to_csv(df):
|
13 |
+
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
14 |
+
# return df[['File_name','Prompt_no','Task','Score']].to_csv().encode('utf-8')
|
15 |
|
|
|
16 |
|
17 |
+
def df_to_csv_download(df, added_version_code='vNone'):
|
18 |
+
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
19 |
+
df['Dashboard_version']= added_version_code
|
20 |
+
return df[['File_name','Prompt_no','Task','Score','Dashboard_version']].to_csv().encode('utf-8')
|
21 |
|
22 |
+
assessment_result_frames = {}
|
23 |
st.title('Assessment Summary')
|
|
|
24 |
|
25 |
|
26 |
+
|
27 |
+
###### Manual assessment visualisation ############################
|
28 |
+
st.header('Manual assessment')
|
29 |
try:
|
30 |
if sum(st.session_state['eval_df']['manual_eval_completed'])>0:
|
31 |
# Display file uploader
|
|
|
35 |
manual_eval_df['Score'] = manual_eval_df['manual_eval_task_score'].map({'Yes':True, 'No':False})
|
36 |
manual_results_df = manual_eval_df.loc[
|
37 |
(manual_eval_df['manual_eval']==True)&
|
38 |
+
~(manual_eval_df['manual_eval_task_score'].isna())]
|
39 |
manual_results_df['Model']='Manual assessment'
|
40 |
assessment_result_frames['Manual assessment'] = manual_results_df
|
41 |
|
|
|
44 |
|
45 |
st.download_button(
|
46 |
label="Download manual assessment data",
|
47 |
+
data=df_to_csv_download(manual_results_df, added_version_code=dashboard_version_code),
|
48 |
file_name='manual_assessment.csv',
|
49 |
mime='text/csv',
|
50 |
)
|
|
|
53 |
except KeyError:
|
54 |
pre_assessment_visualisation(type_str='manual')
|
55 |
|
56 |
+
|
57 |
+
###### Automated assessment visualisation ############################
|
58 |
st.write(' ')
|
59 |
st.header('Automated assessment')
|
60 |
try:
|
|
|
71 |
|
72 |
st.download_button(
|
73 |
label="Download automated assessment data",
|
74 |
+
data=df_to_csv_download(auto_eval_df, added_version_code=dashboard_version_code),
|
75 |
file_name='automated_assessment.csv',
|
76 |
mime='text/csv',
|
77 |
)
|
|
|
79 |
pre_assessment_visualisation(type_str='automated')
|
80 |
|
81 |
|
82 |
+
|
83 |
+
###### Gallery ############################
|
84 |
try:
|
85 |
# Start gallery
|
86 |
st.header('Assessment gallery')
|
|
|
115 |
curr_Prompt = curr_prompt_dir[curr_prompt_dir['ID']==int(curr_Prompt_no)].Prompt
|
116 |
curr_Picture_index = gallery_row_print.Picture_index.item()
|
117 |
# Plot prompt and image
|
118 |
+
st.write('File name: '+gallery_row_print.File_name)
|
119 |
st.write('Prompt: '+curr_Prompt.item())
|
120 |
st.image(st.session_state['uploaded_img'][curr_Picture_index],width=350)
|
121 |
|
pages/Functions/Dashboard_functions.py
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
# General functions and routines used in the dashboard
|
|
|
|
|
|
|
|
|
2 |
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
@@ -9,6 +13,13 @@ from PIL import Image
|
|
9 |
|
10 |
##### Page-unspecific functions
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def assert_uploaded_frame(uploaded_df):
|
13 |
# Set up variables checked for
|
14 |
asserted_columns = {
|
@@ -19,7 +30,8 @@ def assert_uploaded_frame(uploaded_df):
|
|
19 |
asserted_column_names = ['Prompt_no','Score','Task','File_name']
|
20 |
|
21 |
# Check whether all needed column names are present
|
22 |
-
|
|
|
23 |
assert all(existing_column_names), "The uploaded dataframe is missing a column needed for import. Your table needs to contain the columns: 'Prompt_no', 'Score', 'Task', 'File_name' "
|
24 |
|
25 |
# Check whether all needed columns have correct dtypes
|
@@ -35,74 +47,249 @@ def assert_multi_frame_upload(list_of_uploaded_dfs):
|
|
35 |
assert_uploaded_frame(i_df)
|
36 |
|
37 |
##### Dashboard main page
|
38 |
-
def prompt_to_csv(df):
|
39 |
df_download = df
|
40 |
-
df_download['Filename']='p'+df_download['ID'].astype('str')+'
|
41 |
df_download = df[['Prompt','Filename']].drop_duplicates(subset='Filename')
|
42 |
return df_download.to_csv().encode('utf-8')
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
##### Manual assessment
|
45 |
|
46 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
'''
|
48 |
Routine to delete last manual rating and hence to return to it
|
49 |
'''
|
50 |
-
|
|
|
|
|
|
|
51 |
|
|
|
52 |
if st.button('Return to last rated image'):
|
53 |
# The list contains sublists of images rated together, here we loop over these images to reset all of them
|
54 |
-
deleted_picture_index_list =
|
55 |
for i_picind in deleted_picture_index_list:
|
56 |
-
|
57 |
i_picind,'manual_eval_completed']=False
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
def
|
63 |
'''
|
64 |
-
|
65 |
-
|
66 |
'''
|
67 |
-
# Create
|
68 |
-
|
69 |
-
|
70 |
-
st.subheader('Add previous assessments')
|
71 |
-
st.write('Upload results of previous assessment (as downloaded from summary page) to add these results and skip these images in your current manual assessment. Note that you can only add results for images which you have uploaded using the same file name.')
|
72 |
|
73 |
-
|
74 |
-
|
|
|
|
|
75 |
try:
|
76 |
-
|
|
|
77 |
|
78 |
# Run standard assert pipeline
|
79 |
assert_uploaded_frame(uploaded_ratings_df)
|
80 |
|
81 |
# Show matching image count and instructions
|
82 |
-
overlapping_files_df =pd.merge(
|
83 |
st.write('Number of matching file names found: '+ str(len(overlapping_files_df)))
|
84 |
st.write('Click "Add results" button to add / override current ratings with uploaded ratings.')
|
|
|
|
|
85 |
except UnicodeDecodeError:
|
86 |
st.write('WARNING: The uploaded file has to be a .csv downloaded from the "Assessment summary" page.')
|
|
|
87 |
|
88 |
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
try:
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
##### Assessment summary
|
107 |
|
108 |
def print_results_tabs(file_upload, results_df):
|
@@ -167,63 +354,3 @@ def multi_comparison_plotI(results_df = None, uploaded_df_list = []):
|
|
167 |
plt.xlabel(' ')
|
168 |
plt.ylim(0, 100)
|
169 |
return fig,grouped_series
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
############## Functions no longer used, to be deleted
|
175 |
-
|
176 |
-
def plot_style_simple(results_df, return_table = False):
|
177 |
-
'''
|
178 |
-
Simple plot function for plotting just one dataframe of results
|
179 |
-
'''
|
180 |
-
eval_sum = results_df.groupby('Task')['Score'].sum()
|
181 |
-
eval_count = results_df.groupby('Task')['Score'].count()
|
182 |
-
eval_share = (eval_sum/eval_count)*100
|
183 |
-
|
184 |
-
if return_table:
|
185 |
-
return_series = results_df.groupby('Task')['Score'].sum()/results_df.groupby('Task')['Score'].count()*100
|
186 |
-
return_series = return_series.rename('Percentage correct')
|
187 |
-
return return_series
|
188 |
-
|
189 |
-
# Add small amount to make the bars on plot not disappear
|
190 |
-
eval_share = eval_share+1
|
191 |
-
|
192 |
-
fig = plt.figure(figsize=(12, 3))
|
193 |
-
sns.barplot(x=eval_share.index, y=eval_share.values, palette='GnBu')
|
194 |
-
plt.xticks(rotation=-65)
|
195 |
-
plt.ylabel('Percentage correct')
|
196 |
-
plt.xlabel(' ')
|
197 |
-
return fig
|
198 |
-
|
199 |
-
def plot_style_combined(results_df, uploaded_df = None, return_table=False):
|
200 |
-
'''
|
201 |
-
Plot function which can plot to dataframe for comparison
|
202 |
-
'''
|
203 |
-
# Create joined dataframe of results and uploadd_df
|
204 |
-
uploaded_results_df = uploaded_df
|
205 |
-
manual_results_df['Model']='Current'
|
206 |
-
uploaded_results_df['Model']='Uploaded'
|
207 |
-
results_df = pd.concat([manual_results_df,uploaded_results_df])
|
208 |
-
|
209 |
-
# Create scores for plot
|
210 |
-
eval_sum = results_df.groupby(['Model','Task'])['Score'].sum()
|
211 |
-
eval_count = results_df.groupby(['Model','Task'])['Score'].count()
|
212 |
-
eval_share = (eval_sum/eval_count)*100
|
213 |
-
eval_share = eval_share.reset_index()
|
214 |
-
|
215 |
-
if return_table:
|
216 |
-
return_series = results_df.groupby(['Task','Model'])['Score'].sum()/results_df.groupby(['Task','Model'])['Score'].count()*100
|
217 |
-
return_series = return_series.rename('Percentage correct')
|
218 |
-
return return_series
|
219 |
-
|
220 |
-
# Add small amount to make the bars on plot not disappear
|
221 |
-
eval_share['Score'] = eval_share['Score']+1
|
222 |
-
|
223 |
-
# Create plot
|
224 |
-
fig = plt.figure(figsize=(12, 3))
|
225 |
-
sns.barplot(data=eval_share,x='Task',y='Score',hue='Model', palette='GnBu')
|
226 |
-
plt.xticks(rotation=-65)
|
227 |
-
plt.ylabel('Percentage correct')
|
228 |
-
plt.xlabel(' ')
|
229 |
-
return fig
|
|
|
1 |
# General functions and routines used in the dashboard
|
2 |
+
'''
|
3 |
+
- Functions below are ordered by page on which they are used
|
4 |
+
- If possible, functions should not manipulate the session_state within them
|
5 |
+
'''
|
6 |
|
7 |
import streamlit as st
|
8 |
import pandas as pd
|
|
|
13 |
|
14 |
##### Page-unspecific functions
|
15 |
|
16 |
+
def if_true_rerun(bool_input):
|
17 |
+
'''
|
18 |
+
This function triggers a rerun of the page if the input == True
|
19 |
+
'''
|
20 |
+
if bool_input == True:
|
21 |
+
st.experimental_rerun()
|
22 |
+
|
23 |
def assert_uploaded_frame(uploaded_df):
|
24 |
# Set up variables checked for
|
25 |
asserted_columns = {
|
|
|
30 |
asserted_column_names = ['Prompt_no','Score','Task','File_name']
|
31 |
|
32 |
# Check whether all needed column names are present
|
33 |
+
df_columns_list = uploaded_df.columns.tolist()
|
34 |
+
existing_column_names = [(x in df_columns_list) for x in asserted_column_names]
|
35 |
assert all(existing_column_names), "The uploaded dataframe is missing a column needed for import. Your table needs to contain the columns: 'Prompt_no', 'Score', 'Task', 'File_name' "
|
36 |
|
37 |
# Check whether all needed columns have correct dtypes
|
|
|
47 |
assert_uploaded_frame(i_df)
|
48 |
|
49 |
##### Dashboard main page
|
50 |
+
def prompt_to_csv(df, added_version_code='vNone'):
|
51 |
df_download = df
|
52 |
+
df_download['Filename']='p'+df_download['ID'].astype('str')+'_1_'+added_version_code+'.png'
|
53 |
df_download = df[['Prompt','Filename']].drop_duplicates(subset='Filename')
|
54 |
return df_download.to_csv().encode('utf-8')
|
55 |
|
56 |
+
def prompt_df_for_download(prompt_dir):
|
57 |
+
'''
|
58 |
+
Function to create a subset of the prompt_dir via count based selection
|
59 |
+
'''
|
60 |
+
# Create local copy of variables
|
61 |
+
temp_prompt_dir = prompt_dir
|
62 |
+
|
63 |
+
# Create dict to hold counts of downloaded prompts
|
64 |
+
prompt_download_dict = {}
|
65 |
+
## Count how many prompts are in database to allow for max value in selection
|
66 |
+
prompt_task_count = temp_prompt_dir.Task.value_counts(sort=False)
|
67 |
+
prompt_task_select = prompt_task_count.copy()
|
68 |
+
|
69 |
+
# Create numerical selector for every task in prompt directory, add count per task to dict
|
70 |
+
for i_task in prompt_task_select.index:
|
71 |
+
prompt_task_select[i_task] = st.number_input(
|
72 |
+
i_task,
|
73 |
+
value = prompt_task_count[i_task],
|
74 |
+
max_value=prompt_task_count[i_task],
|
75 |
+
min_value=0,
|
76 |
+
step = 1)
|
77 |
+
|
78 |
+
# Create df with selected number of prompts per task
|
79 |
+
for i_task in prompt_task_select.index:
|
80 |
+
temp_df = temp_prompt_dir.loc[temp_prompt_dir['Task']==i_task][0:prompt_task_select[i_task]]
|
81 |
+
if len(temp_df)>0:
|
82 |
+
prompt_download_dict[i_task]=temp_df
|
83 |
+
|
84 |
+
# Concat all tasks to dataframe
|
85 |
+
prompt_download = pd.concat(prompt_download_dict.values())
|
86 |
+
|
87 |
+
# Add linked prompts, if the user chooses to
|
88 |
+
download_linked_prompts = st.checkbox('Download linked prompts', value=True)
|
89 |
+
if download_linked_prompts:
|
90 |
+
|
91 |
+
# Delete rows which do not have linked prompts to avoid type error
|
92 |
+
linked_prompts_info = prompt_download.dropna(subset='Linked_prompts')
|
93 |
+
|
94 |
+
# Add relevant linked prompts
|
95 |
+
linked_prompts_ids = linked_prompts_info.Linked_prompts.str.split(',').explode().unique().astype('int')
|
96 |
+
prompt_download = pd.concat(
|
97 |
+
[prompt_download,
|
98 |
+
temp_prompt_dir.loc[temp_prompt_dir['ID'].isin(linked_prompts_ids)]])
|
99 |
+
|
100 |
+
# Drop rows prompts which appear twice
|
101 |
+
prompt_download = prompt_download.drop_duplicates(subset='ID')
|
102 |
+
|
103 |
+
return prompt_download
|
104 |
+
|
105 |
##### Manual assessment
|
106 |
|
107 |
+
def set_eval_df_rating_vals(eval_df, picture_index, manual_eval, manual_eval_completed, manual_eval_task_score):
|
108 |
+
'''
|
109 |
+
Function to set a block of key manual rating related variables of eval_df
|
110 |
+
'''
|
111 |
+
temp_eval_df = eval_df
|
112 |
+
temp_eval_df.loc[picture_index,'manual_eval']=manual_eval
|
113 |
+
temp_eval_df.loc[picture_index,'manual_eval_completed']=manual_eval_completed
|
114 |
+
temp_eval_df.loc[picture_index,'manual_eval_task_score']=manual_eval_task_score
|
115 |
+
return temp_eval_df
|
116 |
+
|
117 |
+
def radio_rating_index_translation(manual_rating_value):
|
118 |
+
if manual_rating_value == "No":
|
119 |
+
return 1
|
120 |
+
else:
|
121 |
+
return 0
|
122 |
+
|
123 |
+
|
124 |
+
def collect_linked_prompt_ratings(curr_linked_prompts, curr_eval_df, curr_prompt_dir):
|
125 |
+
'''
|
126 |
+
Create elements to collect ratings on linked prompts:
|
127 |
+
If there are linked prompts, create df with info
|
128 |
+
Else create emtpy df which will automatically skip the rating creation for these prompts
|
129 |
+
Here we do not test for (curr_eval_df['manual_eval']==True) as the curr_linked_prompts
|
130 |
+
is already testing for valid prompt number and we want to ignore the exclusion for subprompts
|
131 |
+
'''
|
132 |
+
if type(curr_linked_prompts)==list:
|
133 |
+
curr_linked_rows = curr_eval_df.loc[
|
134 |
+
(curr_eval_df['manual_eval_completed']==False)&
|
135 |
+
(curr_eval_df['Prompt_no'].isin(curr_linked_prompts))]
|
136 |
+
curr_linked_rows = curr_linked_rows.groupby('Prompt_no').first()
|
137 |
+
else:
|
138 |
+
curr_linked_rows = pd.DataFrame()
|
139 |
+
|
140 |
+
# Create rating for subprompts if a df for subprompt info was created
|
141 |
+
for row in curr_linked_rows.itertuples():
|
142 |
+
# Preselected radio option
|
143 |
+
radio_preselect = radio_rating_index_translation(row.manual_eval_task_score)
|
144 |
+
# Prompt
|
145 |
+
st.write('Prompt: {0}'.format(
|
146 |
+
curr_prompt_dir.loc[curr_prompt_dir['ID']==int(row.Index)]['Prompt'].item()
|
147 |
+
))
|
148 |
+
# Image
|
149 |
+
st.image(st.session_state['uploaded_img'][row.Picture_index],width=350)
|
150 |
+
# Rating
|
151 |
+
curr_linked_rows.loc[curr_linked_rows['Picture_index']==row.Picture_index,'manual_eval_task_score'] = st.radio(
|
152 |
+
"Does the image match the prompt?",('Yes', 'No'), horizontal=True, key=row.Picture_index, index=radio_preselect)
|
153 |
+
st.write(' ')
|
154 |
+
st.write(' ')
|
155 |
+
|
156 |
+
return curr_linked_rows
|
157 |
+
|
158 |
+
|
159 |
+
def delete_last_manual_rating(session_history, eval_df):
|
160 |
'''
|
161 |
Routine to delete last manual rating and hence to return to it
|
162 |
'''
|
163 |
+
# Create local copies of objects
|
164 |
+
temp_session_history = session_history
|
165 |
+
temp_eval_df = eval_df.copy()
|
166 |
+
temp_submit = False
|
167 |
|
168 |
+
if len(temp_session_history)>0:
|
169 |
if st.button('Return to last rated image'):
|
170 |
# The list contains sublists of images rated together, here we loop over these images to reset all of them
|
171 |
+
deleted_picture_index_list = temp_session_history.pop()
|
172 |
for i_picind in deleted_picture_index_list:
|
173 |
+
temp_eval_df.loc[
|
174 |
i_picind,'manual_eval_completed']=False
|
175 |
+
#temp_eval_df.loc[
|
176 |
+
# i_picind,'manual_eval_task_score']=np.nan
|
177 |
+
|
178 |
+
# Set submit boolean to true, to rerun the page
|
179 |
+
temp_submit = True
|
180 |
+
|
181 |
+
return temp_session_history, temp_eval_df, temp_submit
|
182 |
+
|
183 |
|
184 |
+
def add_previous_manual_assessments_upload_back(eval_df):
|
185 |
'''
|
186 |
+
Routine to upload a dataframe of previous (manual) assessment to add it to existing database.
|
187 |
+
The uploaded df is assessed, matching counts are printed and it returns the imported df for furthe processing.
|
188 |
'''
|
189 |
+
# Create necessary local variables
|
190 |
+
temp_eval_df = eval_df
|
|
|
|
|
|
|
191 |
|
192 |
+
# Upload single dataframe, setting default to None for code type checking
|
193 |
+
temp_uploaded_ratings = None
|
194 |
+
temp_uploaded_ratings = st.file_uploader('Select .csv for upload', accept_multiple_files=False)
|
195 |
+
if temp_uploaded_ratings != None:
|
196 |
try:
|
197 |
+
# Import the uploaded csv as dataframe
|
198 |
+
uploaded_ratings_df = pd.read_csv(temp_uploaded_ratings)
|
199 |
|
200 |
# Run standard assert pipeline
|
201 |
assert_uploaded_frame(uploaded_ratings_df)
|
202 |
|
203 |
# Show matching image count and instructions
|
204 |
+
overlapping_files_df = pd.merge(temp_eval_df,uploaded_ratings_df,on='File_name',how='inner')
|
205 |
st.write('Number of matching file names found: '+ str(len(overlapping_files_df)))
|
206 |
st.write('Click "Add results" button to add / override current ratings with uploaded ratings.')
|
207 |
+
|
208 |
+
return uploaded_ratings_df
|
209 |
except UnicodeDecodeError:
|
210 |
st.write('WARNING: The uploaded file has to be a .csv downloaded from the "Assessment summary" page.')
|
211 |
+
return temp_uploaded_ratings
|
212 |
|
213 |
|
214 |
+
def add_previous_manual_assessments_upload(eval_df, dashboard_version_code='vNone'):
|
215 |
+
'''
|
216 |
+
Routine to upload a dataframe of previous (manual) assessment to add it to existing database.
|
217 |
+
The uploaded df is assessed, matching counts are printed and it returns the imported df for furthe processing.
|
218 |
+
'''
|
219 |
+
# Create necessary local variables
|
220 |
+
temp_eval_df = eval_df
|
221 |
+
|
222 |
+
# Upload single dataframe, setting default to None for code type checking
|
223 |
+
temp_uploaded_ratings = None
|
224 |
+
temp_uploaded_ratings = st.file_uploader('Select .csv for upload', accept_multiple_files=False)
|
225 |
+
if temp_uploaded_ratings != None:
|
226 |
try:
|
227 |
+
# Import the uploaded csv as dataframe
|
228 |
+
uploaded_ratings_df = pd.read_csv(temp_uploaded_ratings)
|
229 |
+
|
230 |
+
# Run standard assert pipeline
|
231 |
+
assert_uploaded_frame(uploaded_ratings_df)
|
232 |
+
|
233 |
+
# Check the uploaded df has a registered dashboard version
|
234 |
+
assert 'Dashboard_version' in uploaded_ratings_df.columns,"The uploaded dataframe needs to have a Dashboard_version column."
|
235 |
+
# Check for correct dashboard version in uploaded file
|
236 |
+
matching_dashboard_version = uploaded_ratings_df['Dashboard_version'] == dashboard_version_code
|
237 |
+
assert all(matching_dashboard_version),"The dashboard version of your uploaded results does not match the version of this dashboard."
|
238 |
+
|
239 |
+
# Show matching image count and instructions
|
240 |
+
overlapping_files_df = pd.merge(temp_eval_df,uploaded_ratings_df,on='File_name',how='inner')
|
241 |
+
st.write('Number of matching file names found: '+ str(len(overlapping_files_df)))
|
242 |
+
## Show warning if some of the matching images already have a rating
|
243 |
+
if len(overlapping_files_df.manual_eval_task_score.dropna())>0:
|
244 |
+
st.write('WARNING: {0} of {1} matching files already have a saved rating. These will be overriden when you click "Add results".'.format(
|
245 |
+
str(len(overlapping_files_df.manual_eval_task_score.dropna())),str(len(overlapping_files_df))))
|
246 |
+
st.write('Click "Add results" button to add uploaded ratings to current ratings.')
|
247 |
+
return uploaded_ratings_df
|
248 |
+
except UnicodeDecodeError:
|
249 |
+
st.write('WARNING: The uploaded file has to be a .csv downloaded from the "Assessment summary" page.')
|
250 |
+
return temp_uploaded_ratings
|
251 |
+
|
252 |
+
def add_previous_manual_assessments_submit(eval_df, uploaded_ratings):
|
253 |
+
'''
|
254 |
+
If uploaded_ratings != None, this will create a button which when pressed will trigger
|
255 |
+
for the provided ratings to be added to eval_df
|
256 |
+
'''
|
257 |
+
# Create necessary local variables
|
258 |
+
temp_eval_df = eval_df
|
259 |
+
temp_submitted = False
|
260 |
+
|
261 |
+
# Create dict to translate uploaded score into str format used during manual assessment
|
262 |
+
bool_str_dict = {True:'Yes',False:'No'}
|
263 |
|
264 |
+
# If a dataframe of uploaded ratings was provided: create a button which allows to add ratings to existing eval_df
|
265 |
+
if type(uploaded_ratings) == pd.DataFrame:
|
266 |
+
temp_submitted = st.button("Add results")
|
267 |
+
if temp_submitted:
|
268 |
+
for row in uploaded_ratings.itertuples():
|
269 |
+
temp_eval_df.loc[temp_eval_df['File_name']==row.File_name,'manual_eval']=True
|
270 |
+
temp_eval_df.loc[temp_eval_df['File_name']==row.File_name,'manual_eval_completed']=True
|
271 |
+
temp_eval_df.loc[temp_eval_df['File_name']==row.File_name,'manual_eval_task_score']=bool_str_dict[row.Score]
|
272 |
+
return temp_eval_df, temp_submitted
|
273 |
|
274 |
|
275 |
+
def add_previous_manual_assessments(eval_df, dashboard_version_code):
|
276 |
+
'''
|
277 |
+
Full routine to allow the user to upload past ratings and add these to eval_df
|
278 |
+
'''
|
279 |
+
st.subheader('Add previous assessments')
|
280 |
+
st.write('Upload results of previous assessment (as downloaded from summary page) to add these results and skip these images in your current manual assessment. Note that you can only add results for images which you have uploaded using the same file name.')
|
281 |
+
|
282 |
+
# Create necessary local variables
|
283 |
+
temp_eval_df = eval_df
|
284 |
+
|
285 |
+
# Allow user to upload .csv with prior ratings
|
286 |
+
uploaded_ratings = add_previous_manual_assessments_upload(temp_eval_df, dashboard_version_code)
|
287 |
+
|
288 |
+
# Add rating to eval_df, if some were uploaded
|
289 |
+
temp_eval_df, temp_submitted = add_previous_manual_assessments_submit(temp_eval_df, uploaded_ratings)
|
290 |
+
|
291 |
+
return temp_eval_df, temp_submitted
|
292 |
+
|
293 |
##### Assessment summary
|
294 |
|
295 |
def print_results_tabs(file_upload, results_df):
|
|
|
354 |
plt.xlabel(' ')
|
355 |
plt.ylim(0, 100)
|
356 |
return fig,grouped_series
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|