import streamlit as st from annotated_text import annotated_text import pandas as pd import yaml import os from src.negation import * from src.app_utils import * from src.inference import inference from src.trainers import eval_spacy #### Loading configuration and models #### with open('./st_config.yaml', "r") as yamlfile: args = yaml.load(yamlfile, Loader=yaml.FullLoader) if args['model_dir'] is None: model_names_dir = [] elif os.path.exists(args['model_dir']): model_names_dir = os.listdir(args['model_dir']) else: model_names_dir = [] model_names = model_names_dir + args['default_models'] if args['default_models'] is not None else model_names_dir st.title('NER Visualizer') ################################## #### sidebar (Chose Model) ###### ################################## model_name= st.sidebar.selectbox("Select a model", options=model_names) print(model_name) if len(model_names) > 0: models = load_models(model_names,args, model_names_dir) print(models) selected_model = models[model_name] print(selected_model) ################################## #### sidebar (Chose Example) #### ################################## st.sidebar.markdown('###') if args['examples'] is not None: chosen_note = st.sidebar.selectbox("Select an example text", options=args['examples'].keys()) else: chosen_note = None if chosen_note == "radiology_eval_dataset": text_input = pd.read_csv("./eval_35.csv", converters={'entities': ast.literal_eval}) text_input = text_input.to_dict('records') # set colors for each entity if len(model_names) > 0: ents_available = selected_model.get_pipe('ner').labels print(ents_available) ent_colors_map = dict(map(lambda i,j : (i,j) , ents_available,args['colors_palette'][:len(ents_available)])) ################## ### Text area ### ################## if chosen_note != "radiology_eval_dataset": text_input = st.text_area("Type notes in the box below", value=args['examples'][chosen_note] if args['examples'] is not None else '') st.markdown("---") ############################ ### Side bar (Load Files)### ############################ st.sidebar.info('For csv & json files, name the text columns to be infered as "text". Annotated labels as "entities" Format of json text as below') st.sidebar.json([{"text":"example","entities":[[5,6,"do"],[8,11,"dx"]]},{"text":"example2","entities":[[5,6,"do"],[8,11,"dx"]]}],expanded=False) uploaded_file = st.sidebar.file_uploader("Upload a file", type=["csv","json","pdf", "txt"]) text_input = process_files(uploaded_file, text_input) ################################# ### Side bar (Select Entities)### ################################# selected_entities = st.sidebar.multiselect( "Select the entities you want to view", options=ents_available if len(model_names)> 0 else [], default=ents_available if len(model_names)> 0 else [], ) ########################## ### Text Area (Slider)### ########################## if (len(text_input)> 1) & (isinstance(text_input,(list,dict))): sample = st.slider('Select Example', min_value=1, max_value=len(text_input)) else: sample = None # Process documents to tokens if len(model_names)>0: infer_input = text_input[sample-1]["text"] if sample is not None else text_input doc = selected_model(infer_input) textcol_negate, textcol_compare = st.columns([1, 1]) # checkboxes for negation negate = textcol_negate.checkbox('Check for Negation') ########################################## ### Checkboxes for Compare with labels ### ########################################## if (isinstance(text_input,(dict,list))): if 'entities' in text_input[0].keys(): state_compare = False compare = textcol_compare.checkbox('Compare between predictions and labels',disabled=state_compare) else: state_compare, compare = True, False else: state_compare, compare = True, False ############################### ### Processing for negation ### ############################### if negate: neg_ent = {"ent_types":list(selected_model.get_pipe('ner').labels)} neg = negation(selected_model, neg_ent) doc = infer_negation(neg,selected_model,infer_input,doc) selected_entities += ['NEG'] ent_colors_map.update({'NEG': '#C7C7C7'}) ################################ ### Processing for Comparision## ################################ if compare & (isinstance(text_input,(dict,list))): infer_input = text_input[sample-1] tokens_compare = process_text_compare(infer_input,selected_entities,colors=ent_colors_map) tokens = process_text(doc, selected_entities,colors=ent_colors_map) st.markdown('##') # Display results st.markdown('#### Predictions') annotated_text(*tokens) if compare & (isinstance(text_input,(dict,list))): st.markdown('#### Labels') annotated_text(*tokens_compare) st.markdown("---") data = pd.DataFrame.from_dict([{'label': entity.label_, 'text': entity.text, 'start': entity.start, 'end': entity.end} \ for entity in doc.ents]) if data.shape[1]>0: st.table(data['label'].value_counts()) myexpander = st.expander('Details on text') myexpander.table(data) ################################### #### Inference on whole dataset#### ################################### infer_whole_dataset = st.checkbox('Inference on whole dataset') if (isinstance(text_input,(dict,list))) & (infer_whole_dataset): texts = [] for text in text_input: texts.append(text['text']) st.markdown('### Prediction on whole dataset') inference_data = inference(selected_model,texts) ### Applying negation to whole dataset if negate: neg_ent = {"ent_types":list(selected_model.get_pipe('ner').labels)} neg = negation(selected_model, neg_ent) docs = selected_model.pipe(texts,batch_size=8) records = [] for no,doc in enumerate(docs): doc = infer_negation(neg,selected_model,texts[no],doc) if len(doc.ents)>0: records.append([{'id':no+1,'text':doc.text,'span': entity.text, 'entity': entity.label_, 'start': entity.start, 'end': entity.end} for entity in doc.ents]) else: records.append([{'id':no+1,'text':doc.text,'span': None, 'entity': None, 'start':None, 'end': None}]) inference_data = pd.DataFrame.from_dict(sum(records,[])).set_index(['text','id']) st.download_button( label="Download Prediction as CSV", data=inference_data.to_csv().encode('utf-8'), file_name='inference_data.csv', mime='text/csv', ) ######################################## ### Expander for dataframe and report### ######################################## report_expander = st.expander('Report on Evaluation Results') results_metrics = eval_spacy(selected_model,text_input) overall_score = pd.DataFrame.from_dict({'Type':['Overall'],'Precision': [results_metrics['ents_p']], 'Recall': [results_metrics['ents_r']], 'F1': [results_metrics['ents_f']]}) overall_score = overall_score.set_index('Type') entities_score = pd.DataFrame.from_dict(results_metrics['ents_per_type']).T entities_score = entities_score.rename(columns={'p':'Precision','r':'Recall','f':'F1'}) report_expander.table(overall_score) report_expander.table(entities_score) df_expander = st.expander('Inference Table') df_expander.write(inference_data.to_html(), unsafe_allow_html=True) #df_expander.table(inference_data)