|
import gradio as gr |
|
from transformers import pipeline, AutoTokenizer |
|
|
|
|
|
|
|
model_name = 'ehri-ner/xlm-roberta-large-ehri-ner-all' |
|
|
|
|
|
ner_model = pipeline('ner', model=model_name) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
def predict(text): |
|
|
|
tokens = tokenizer.tokenize(text) |
|
|
|
|
|
entities = ner_model(text) |
|
|
|
|
|
result = {} |
|
|
|
|
|
for entity in entities: |
|
|
|
word = entity['word'] |
|
entity_type = entity['entity'] |
|
|
|
|
|
if word.startswith('##'): |
|
|
|
word = word[2:] |
|
last_word = list(result.keys())[-1] |
|
result[last_word + word] = result.pop(last_word) |
|
else: |
|
|
|
result[word] = entity_type |
|
|
|
return result |
|
|
|
|
|
iface = gr.Interface(fn=predict, |
|
inputs=gr.Textbox(lines=2, placeholder='Enter text here...'), |
|
outputs='json') |
|
|
|
|
|
iface.launch() |