Spaces:
Runtime error
Runtime error
import streamlit as st | |
from annotated_text import annotated_text | |
import pandas as pd | |
import yaml | |
import os | |
from src.negation import * | |
from src.app_utils import * | |
from src.inference import inference | |
from src.trainers import eval_spacy | |
#### Loading configuration and models #### | |
with open('./st_config.yaml', "r") as yamlfile: | |
args = yaml.load(yamlfile, Loader=yaml.FullLoader) | |
if args['model_dir'] is None: | |
model_names_dir = [] | |
elif os.path.exists(args['model_dir']): | |
model_names_dir = os.listdir(args['model_dir']) | |
else: | |
model_names_dir = [] | |
model_names = model_names_dir + args['default_models'] if args['default_models'] is not None else model_names_dir | |
st.title('NER Visualizer') | |
################################## | |
#### sidebar (Chose Model) ###### | |
################################## | |
model_name= st.sidebar.selectbox("Select a model", options=model_names) | |
print(model_name) | |
if len(model_names) > 0: | |
models = load_models(model_names,args, model_names_dir) | |
print(models) | |
selected_model = models[model_name] | |
print(selected_model) | |
################################## | |
#### sidebar (Chose Example) #### | |
################################## | |
st.sidebar.markdown('###') | |
if args['examples'] is not None: | |
chosen_note = st.sidebar.selectbox("Select an example text", options=args['examples'].keys()) | |
else: | |
chosen_note = None | |
if chosen_note == "radiology_eval_dataset": | |
text_input = pd.read_csv("./eval_35.csv", converters={'entities': ast.literal_eval}) | |
text_input = text_input.to_dict('records') | |
# set colors for each entity | |
if len(model_names) > 0: | |
ents_available = selected_model.get_pipe('ner').labels | |
print(ents_available) | |
ent_colors_map = dict(map(lambda i,j : (i,j) , ents_available,args['colors_palette'][:len(ents_available)])) | |
################## | |
### Text area ### | |
################## | |
if chosen_note != "radiology_eval_dataset": | |
text_input = st.text_area("Type notes in the box below", | |
value=args['examples'][chosen_note] if args['examples'] is not None else '') | |
st.markdown("---") | |
############################ | |
### Side bar (Load Files)### | |
############################ | |
st.sidebar.info('For csv & json files, name the text columns to be infered as "text". Annotated labels as "entities" Format of json text as below') | |
st.sidebar.json([{"text":"example","entities":[[5,6,"do"],[8,11,"dx"]]},{"text":"example2","entities":[[5,6,"do"],[8,11,"dx"]]}],expanded=False) | |
uploaded_file = st.sidebar.file_uploader("Upload a file", type=["csv","json","pdf", "txt"]) | |
text_input = process_files(uploaded_file, text_input) | |
################################# | |
### Side bar (Select Entities)### | |
################################# | |
selected_entities = st.sidebar.multiselect( | |
"Select the entities you want to view", | |
options=ents_available if len(model_names)> 0 else [], | |
default=ents_available if len(model_names)> 0 else [], | |
) | |
########################## | |
### Text Area (Slider)### | |
########################## | |
if (len(text_input)> 1) & (isinstance(text_input,(list,dict))): | |
sample = st.slider('Select Example', min_value=1, max_value=len(text_input)) | |
else: | |
sample = None | |
# Process documents to tokens | |
if len(model_names)>0: | |
infer_input = text_input[sample-1]["text"] if sample is not None else text_input | |
doc = selected_model(infer_input) | |
textcol_negate, textcol_compare = st.columns([1, 1]) | |
# checkboxes for negation | |
negate = textcol_negate.checkbox('Check for Negation') | |
########################################## | |
### Checkboxes for Compare with labels ### | |
########################################## | |
if (isinstance(text_input,(dict,list))): | |
if 'entities' in text_input[0].keys(): | |
state_compare = False | |
compare = textcol_compare.checkbox('Compare between predictions and labels',disabled=state_compare) | |
else: | |
state_compare, compare = True, False | |
else: | |
state_compare, compare = True, False | |
############################### | |
### Processing for negation ### | |
############################### | |
if negate: | |
neg_ent = {"ent_types":list(selected_model.get_pipe('ner').labels)} | |
neg = negation(selected_model, neg_ent) | |
doc = infer_negation(neg,selected_model,infer_input,doc) | |
selected_entities += ['NEG'] | |
ent_colors_map.update({'NEG': '#C7C7C7'}) | |
################################ | |
### Processing for Comparision## | |
################################ | |
if compare & (isinstance(text_input,(dict,list))): | |
infer_input = text_input[sample-1] | |
tokens_compare = process_text_compare(infer_input,selected_entities,colors=ent_colors_map) | |
tokens = process_text(doc, selected_entities,colors=ent_colors_map) | |
st.markdown('##') | |
# Display results | |
st.markdown('#### Predictions') | |
annotated_text(*tokens) | |
if compare & (isinstance(text_input,(dict,list))): | |
st.markdown('#### Labels') | |
annotated_text(*tokens_compare) | |
st.markdown("---") | |
data = pd.DataFrame.from_dict([{'label': entity.label_, 'text': entity.text, 'start': entity.start, 'end': entity.end} \ | |
for entity in doc.ents]) | |
if data.shape[1]>0: | |
st.table(data['label'].value_counts()) | |
myexpander = st.expander('Details on text') | |
myexpander.table(data) | |
################################### | |
#### Inference on whole dataset#### | |
################################### | |
infer_whole_dataset = st.checkbox('Inference on whole dataset') | |
if (isinstance(text_input,(dict,list))) & (infer_whole_dataset): | |
texts = [] | |
for text in text_input: | |
texts.append(text['text']) | |
st.markdown('### Prediction on whole dataset') | |
inference_data = inference(selected_model,texts) | |
### Applying negation to whole dataset | |
if negate: | |
neg_ent = {"ent_types":list(selected_model.get_pipe('ner').labels)} | |
neg = negation(selected_model, neg_ent) | |
docs = selected_model.pipe(texts,batch_size=8) | |
records = [] | |
for no,doc in enumerate(docs): | |
doc = infer_negation(neg,selected_model,texts[no],doc) | |
if len(doc.ents)>0: | |
records.append([{'id':no+1,'text':doc.text,'span': entity.text, | |
'entity': entity.label_, 'start': entity.start, 'end': entity.end} | |
for entity in doc.ents]) | |
else: | |
records.append([{'id':no+1,'text':doc.text,'span': None, | |
'entity': None, 'start':None, 'end': None}]) | |
inference_data = pd.DataFrame.from_dict(sum(records,[])).set_index(['text','id']) | |
st.download_button( | |
label="Download Prediction as CSV", | |
data=inference_data.to_csv().encode('utf-8'), | |
file_name='inference_data.csv', | |
mime='text/csv', | |
) | |
######################################## | |
### Expander for dataframe and report### | |
######################################## | |
report_expander = st.expander('Report on Evaluation Results') | |
results_metrics = eval_spacy(selected_model,text_input) | |
overall_score = pd.DataFrame.from_dict({'Type':['Overall'],'Precision': [results_metrics['ents_p']], | |
'Recall': [results_metrics['ents_r']], | |
'F1': [results_metrics['ents_f']]}) | |
overall_score = overall_score.set_index('Type') | |
entities_score = pd.DataFrame.from_dict(results_metrics['ents_per_type']).T | |
entities_score = entities_score.rename(columns={'p':'Precision','r':'Recall','f':'F1'}) | |
report_expander.table(overall_score) | |
report_expander.table(entities_score) | |
df_expander = st.expander('Inference Table') | |
df_expander.write(inference_data.to_html(), unsafe_allow_html=True) | |
#df_expander.table(inference_data) | |