Spaces:

cogniveon
/

nlp_groupcw_demo

Sleeping

nlp_groupcw_demo / app.py

Update app.py

c2f4003 verified 5 months ago

No virus

1.57 kB

	import gradio as gr
	from transformers import pipeline

	pipe = pipeline("token-classification", model="cogniveon/nlpcw_bert-base-uncased-abbr", grouped_entities=True)

	def predict(input) -> list[tuple[str, str \| float \| None]] \| dict \| None:
	output = pipe(input)
	entities = []

	# Collect entities with their start and end positions
	for entity in output:
	entities.append({
	"entity": entity["entity_group"],
	"word": entity["word"],
	"score": round(entity["score"], 4),
	"start": entity["start"],
	"end": entity["end"]
	})

	highlighted_text = [(input[:entities[0]['start']], None)] # Initial text before the first entity

	# Generate highlighted text segments
	for i, entity in enumerate(entities):
	highlighted_text.append((input[entity['start']:entity['end']], entity['entity']))
	if i < len(entities) - 1:
	highlighted_text.append((input[entity['end']:entities[i+1]['start']], None))
	else:
	highlighted_text.append((input[entity['end']:], None)) # Remaining text after the last entity

	return highlighted_text


	demo = gr.Interface(
	predict,
	gr.Textbox(
	label="Input",
	lines=3,
	),
	gr.HighlightedText(
	label="Output",
	combine_adjacent=True,
	show_legend=True
	),
	examples=[
	["We developed a variant of gene set enrichment analysis (GSEA) to determine whether a genetic pathway shows evidence for age regulation [23]."],
	],
	).launch()