Spaces:
Sleeping
Sleeping
File size: 1,565 Bytes
ccb67fa 8bd15cf ccb67fa 8bd15cf c2f4003 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from transformers import pipeline
pipe = pipeline("token-classification", model="cogniveon/nlpcw_bert-base-uncased-abbr", grouped_entities=True)
def predict(input) -> list[tuple[str, str | float | None]] | dict | None:
output = pipe(input)
entities = []
# Collect entities with their start and end positions
for entity in output:
entities.append({
"entity": entity["entity_group"],
"word": entity["word"],
"score": round(entity["score"], 4),
"start": entity["start"],
"end": entity["end"]
})
highlighted_text = [(input[:entities[0]['start']], None)] # Initial text before the first entity
# Generate highlighted text segments
for i, entity in enumerate(entities):
highlighted_text.append((input[entity['start']:entity['end']], entity['entity']))
if i < len(entities) - 1:
highlighted_text.append((input[entity['end']:entities[i+1]['start']], None))
else:
highlighted_text.append((input[entity['end']:], None)) # Remaining text after the last entity
return highlighted_text
demo = gr.Interface(
predict,
gr.Textbox(
label="Input",
lines=3,
),
gr.HighlightedText(
label="Output",
combine_adjacent=True,
show_legend=True
),
examples=[
["We developed a variant of gene set enrichment analysis (GSEA) to determine whether a genetic pathway shows evidence for age regulation [23]."],
],
).launch() |