cogniveon's picture
Update app.py
c2f4003 verified
raw
history blame
No virus
1.57 kB
import gradio as gr
from transformers import pipeline
pipe = pipeline("token-classification", model="cogniveon/nlpcw_bert-base-uncased-abbr", grouped_entities=True)
def predict(input) -> list[tuple[str, str | float | None]] | dict | None:
output = pipe(input)
entities = []
# Collect entities with their start and end positions
for entity in output:
entities.append({
"entity": entity["entity_group"],
"word": entity["word"],
"score": round(entity["score"], 4),
"start": entity["start"],
"end": entity["end"]
})
highlighted_text = [(input[:entities[0]['start']], None)] # Initial text before the first entity
# Generate highlighted text segments
for i, entity in enumerate(entities):
highlighted_text.append((input[entity['start']:entity['end']], entity['entity']))
if i < len(entities) - 1:
highlighted_text.append((input[entity['end']:entities[i+1]['start']], None))
else:
highlighted_text.append((input[entity['end']:], None)) # Remaining text after the last entity
return highlighted_text
demo = gr.Interface(
predict,
gr.Textbox(
label="Input",
lines=3,
),
gr.HighlightedText(
label="Output",
combine_adjacent=True,
show_legend=True
),
examples=[
["We developed a variant of gene set enrichment analysis (GSEA) to determine whether a genetic pathway shows evidence for age regulation [23]."],
],
).launch()