import gradio as gr from transformers import pipeline, AutoTokenizer # Specify the name of the model model_name = 'ehri-ner/xlm-roberta-large-ehri-ner-all' # Load the model from Hugging Face ner_model = pipeline('ner', model=model_name) # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) def predict(text): # Tokenize the text tokens = tokenizer.tokenize(text) # Use the model to predict the named entities for each token entities = ner_model(text) # Initialize an empty dictionary to store the results result = {} # Iterate over the entities for entity in entities: # Get the word and entity type word = entity['word'] entity_type = entity['entity'] # If the word starts with '##', it's a subword if word.startswith('##'): # Remove the '##' and append the subword to the last word in the result word = word[2:] last_word = list(result.keys())[-1] result[last_word + word] = result.pop(last_word) else: # Add the word and entity type to the result result[word] = entity_type return result # Define the Gradio interface iface = gr.Interface(fn=predict, inputs=gr.Textbox(lines=2, placeholder='Enter text here...'), outputs='json') # Launch the interface iface.launch()