Spaces:

Kaelan
/

ner-analyzer

Runtime error

ner-analyzer / app.py

Kaelan

add config

e972274 over 1 year ago

8.16 kB

	import streamlit as st
	from annotated_text import annotated_text
	import pandas as pd
	import yaml
	import os

	from src.negation import *
	from src.app_utils import *
	from src.inference import inference
	from src.trainers import eval_spacy

	#### Loading configuration and models ####

	with open('./st_config.yaml', "r") as yamlfile:
	args = yaml.load(yamlfile, Loader=yaml.FullLoader)

	if args['model_dir'] is None:
	model_names_dir = []
	elif os.path.exists(args['model_dir']):
	model_names_dir = os.listdir(args['model_dir'])
	else:
	model_names_dir = []


	model_names = model_names_dir + args['default_models'] if args['default_models'] is not None else model_names_dir

	st.title('NER Visualizer')



	##################################
	#### sidebar (Chose Model) ######
	##################################
	model_name= st.sidebar.selectbox("Select a model", options=model_names)
	print(model_name)
	if len(model_names) > 0:
	models = load_models(model_names,args, model_names_dir)
	print(models)
	selected_model = models[model_name]
	print(selected_model)

	##################################
	#### sidebar (Chose Example) ####
	##################################
	st.sidebar.markdown('###')
	if args['examples'] is not None:
	chosen_note = st.sidebar.selectbox("Select an example text", options=args['examples'].keys())
	else:
	chosen_note = None

	if chosen_note == "radiology_eval_dataset":
	text_input = pd.read_csv("./eval_35.csv", converters={'entities': ast.literal_eval})
	text_input = text_input.to_dict('records')


	# set colors for each entity
	if len(model_names) > 0:
	ents_available = selected_model.get_pipe('ner').labels
	print(ents_available)
	ent_colors_map = dict(map(lambda i,j : (i,j) , ents_available,args['colors_palette'][:len(ents_available)]))


	##################
	### Text area ###
	##################
	if chosen_note != "radiology_eval_dataset":
	text_input = st.text_area("Type notes in the box below",
	value=args['examples'][chosen_note] if args['examples'] is not None else '')
	st.markdown("---")

	############################
	### Side bar (Load Files)###
	############################
	st.sidebar.info('For csv & json files, name the text columns to be infered as "text". Annotated labels as "entities" Format of json text as below')
	st.sidebar.json([{"text":"example","entities":[[5,6,"do"],[8,11,"dx"]]},{"text":"example2","entities":[[5,6,"do"],[8,11,"dx"]]}],expanded=False)
	uploaded_file = st.sidebar.file_uploader("Upload a file", type=["csv","json","pdf", "txt"])
	text_input = process_files(uploaded_file, text_input)

	#################################
	### Side bar (Select Entities)###
	#################################
	selected_entities = st.sidebar.multiselect(
	"Select the entities you want to view",
	options=ents_available if len(model_names)> 0 else [],
	default=ents_available if len(model_names)> 0 else [],
	)

	##########################
	### Text Area (Slider)###
	##########################
	if (len(text_input)> 1) & (isinstance(text_input,(list,dict))):
	sample = st.slider('Select Example', min_value=1, max_value=len(text_input))
	else:
	sample = None



	# Process documents to tokens
	if len(model_names)>0:
	infer_input = text_input[sample-1]["text"] if sample is not None else text_input
	doc = selected_model(infer_input)

	textcol_negate, textcol_compare = st.columns([1, 1])

	# checkboxes for negation
	negate = textcol_negate.checkbox('Check for Negation')

	##########################################
	### Checkboxes for Compare with labels ###
	##########################################
	if (isinstance(text_input,(dict,list))):
	if 'entities' in text_input[0].keys():
	state_compare = False
	compare = textcol_compare.checkbox('Compare between predictions and labels',disabled=state_compare)
	else:
	state_compare, compare = True, False
	else:
	state_compare, compare = True, False

	###############################
	### Processing for negation ###
	###############################
	if negate:
	neg_ent = {"ent_types":list(selected_model.get_pipe('ner').labels)}
	neg = negation(selected_model, neg_ent)
	doc = infer_negation(neg,selected_model,infer_input,doc)
	selected_entities += ['NEG']
	ent_colors_map.update({'NEG': '#C7C7C7'})

	################################
	### Processing for Comparision##
	################################
	if compare & (isinstance(text_input,(dict,list))):
	infer_input = text_input[sample-1]
	tokens_compare = process_text_compare(infer_input,selected_entities,colors=ent_colors_map)

	tokens = process_text(doc, selected_entities,colors=ent_colors_map)

	st.markdown('##')
	# Display results
	st.markdown('#### Predictions')
	annotated_text(*tokens)

	if compare & (isinstance(text_input,(dict,list))):
	st.markdown('#### Labels')
	annotated_text(*tokens_compare)

	st.markdown("---")
	data = pd.DataFrame.from_dict([{'label': entity.label_, 'text': entity.text, 'start': entity.start, 'end': entity.end} \
	for entity in doc.ents])
	if data.shape[1]>0:
	st.table(data['label'].value_counts())
	myexpander = st.expander('Details on text')
	myexpander.table(data)

	###################################
	#### Inference on whole dataset####
	###################################
	infer_whole_dataset = st.checkbox('Inference on whole dataset')
	if (isinstance(text_input,(dict,list))) & (infer_whole_dataset):
	texts = []
	for text in text_input:
	texts.append(text['text'])

	st.markdown('### Prediction on whole dataset')
	inference_data = inference(selected_model,texts)

	### Applying negation to whole dataset
	if negate:
	neg_ent = {"ent_types":list(selected_model.get_pipe('ner').labels)}
	neg = negation(selected_model, neg_ent)
	docs = selected_model.pipe(texts,batch_size=8)

	records = []
	for no,doc in enumerate(docs):
	doc = infer_negation(neg,selected_model,texts[no],doc)
	if len(doc.ents)>0:
	records.append([{'id':no+1,'text':doc.text,'span': entity.text,
	'entity': entity.label_, 'start': entity.start, 'end': entity.end}
	for entity in doc.ents])
	else:
	records.append([{'id':no+1,'text':doc.text,'span': None,
	'entity': None, 'start':None, 'end': None}])

	inference_data = pd.DataFrame.from_dict(sum(records,[])).set_index(['text','id'])

	st.download_button(
	label="Download Prediction as CSV",
	data=inference_data.to_csv().encode('utf-8'),
	file_name='inference_data.csv',
	mime='text/csv',
	)
	########################################
	### Expander for dataframe and report###
	########################################
	report_expander = st.expander('Report on Evaluation Results')
	results_metrics = eval_spacy(selected_model,text_input)
	overall_score = pd.DataFrame.from_dict({'Type':['Overall'],'Precision': [results_metrics['ents_p']],
	'Recall': [results_metrics['ents_r']],
	'F1': [results_metrics['ents_f']]})
	overall_score = overall_score.set_index('Type')
	entities_score = pd.DataFrame.from_dict(results_metrics['ents_per_type']).T
	entities_score = entities_score.rename(columns={'p':'Precision','r':'Recall','f':'F1'})
	report_expander.table(overall_score)
	report_expander.table(entities_score)

	df_expander = st.expander('Inference Table')
	df_expander.write(inference_data.to_html(), unsafe_allow_html=True)
	#df_expander.table(inference_data)