# Gradio import gradio as gr # Hugging Face libraries from transformers import pipeline from transformers import AutoTokenizer # Model checkpoint model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english" # Instantiate the pipeline ner_task = pipeline(model=model_checkpoint, task="ner", aggregation_strategy="simple") # Instantiate the tokenizer tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) # Sample sentences sentence1 = "Herbert Akroyd Stuart patented the first diesel engine, 1890" sentence2 = "May 10 A delegation tells Leopold III his return would be \ illtimed, 1945" sentence3 = "Fri May 10 Fred Astaire (Frederick Austerlitz) born in Omaha, Nebraska, 1899" sentence4 = "Fri May 10 Germany invades Low Countries, 1940" sentence5 = "Fri May 10 Nazi bookburning, 1933" sentence6 = "Fri May 10 Confederate Memorial Day in South Carolina" sentence7 = "Fri May 10 Mothers Day in Guatemala" sentence8 = "Fri May 10 Dave Mason is born in Worcester, England, 1945" # Gradio interface def predict(sentence): """ Use the corresponding tokenizer to tokenize the sentence. Use the model to predict the entities. """ # Get the tokens from the tokenizer processed_tokens = tokenizer(sentence) token_pieces = processed_tokens.tokens() # Get the prediction of ner from the model result_ner = ner_task(sentence) formatted_ner = "" entities_count = 0 # Print individual entities. # Start the count from 1 for intuitive reading. for i, result in enumerate(result_ner): # Only get the result where score is at least 0.8 if result['score'] < 0.8: continue; else: entities_count += 1 formatted_ner += f"Number: {entities_count} \n" \ + f"Entity: {result['entity_group']}\n" \ + f"Word group: {result['word']}\n" \ + f"Score: {result['score']}\n" formatted_ner += f"{result}\n\n" formatted_ner += f"Number of predicted entities: {entities_count}\n\n" return token_pieces, formatted_ner # Main Gradio interface demo = gr.Interface( fn = predict, inputs = [gr.TextArea(label="Place your sentence here", lines=10, show_copy_button=True)], outputs = [ gr.TextArea(label="Tokens input to the model", interactive=False, lines=10, show_copy_button=True), gr.TextArea(label="Prediction of entities", interactive=False, lines=10, show_copy_button=True) ], examples=[[sentence1], [sentence2], [sentence3], [sentence4], [sentence5], [sentence6], [sentence7], [sentence8]], title = "NER (Named Entities Recognition)", description = f""" ## Using model {model_checkpoint} to predict entities type

Notes:

""" ) demo.launch()