import gradio as gr from transformers import pipeline pipe = pipeline("token-classification", model="cogniveon/nlpcw_bert-base-uncased-abbr", grouped_entities=True) def predict(input) -> list[tuple[str, str | float | None]] | dict | None: output = pipe(input) entities = [] # Collect entities with their start and end positions for entity in output: entities.append({ "entity": entity["entity_group"], "word": entity["word"], "score": round(entity["score"], 4), "start": entity["start"], "end": entity["end"] }) highlighted_text = [(input[:entities[0]['start']], None)] # Initial text before the first entity # Generate highlighted text segments for i, entity in enumerate(entities): highlighted_text.append((input[entity['start']:entity['end']], entity['entity'])) if i < len(entities) - 1: highlighted_text.append((input[entity['end']:entities[i+1]['start']], None)) else: highlighted_text.append((input[entity['end']:], None)) # Remaining text after the last entity return highlighted_text demo = gr.Interface( predict, gr.Textbox( label="Input", lines=3, ), gr.HighlightedText( label="Output", combine_adjacent=True, show_legend=True ), examples=[ ["We developed a variant of gene set enrichment analysis (GSEA) to determine whether a genetic pathway shows evidence for age regulation [23]."], ], ).launch()