Spaces:

liewchooichin
/

ner_from_pretrained

Runtime error

App Files Files Community

liewchooichin commited on May 10, 2024

Commit

7f12d56

verified ·

1 Parent(s): aaff802

Create app.py

Browse files

Files changed (1) hide show

app.py +80 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# Gradio
+import gradio as gr
+# Hugging Face libraries
+from transformers import pipeline
+from transformers import AutoTokenizer
+# Model checkpoint
+model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english"
+# Instantiate the pipeline
+ner_task = pipeline(model=model_checkpoint, task="ner",
+        aggregation_strategy="simple")
+# Instantiate the tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+# Sample sentences
+sentence1 = "Herbert Akroyd Stuart patented the first diesel engine, 1890"
+sentence2 = "May 10 A delegation tells Leopold III his return would be \
+illtimed, 1945"
+sentence3 = "Fri May 10 Fred Astaire (Frederick Austerlitz) born in Omaha, Nebraska, 1899"
+sentence4 = "Fri May 10 Germany invades Low Countries, 1940"
+sentence5 = "Fri May 10 Nazi bookburning, 1933"
+sentence6 = "Fri May 10 Confederate Memorial Day in South Carolina"
+sentence7 = "Fri May 10 Mothers Day in Guatemala"
+sentence8 = "Fri May 10 Dave Mason is born in Worcester, England, 1945"
+# Gradio interface
+def predict(sentence):
+    """
+    Use the corresponding tokenizer to tokenize the sentence.
+    Use the model to predict the entities.
+    """
+    # Get the tokens from the tokenizer
+    processed_tokens = tokenizer(sentence)
+    token_pieces = processed_tokens.tokens()
+    # Get the prediction of ner from the model
+    result_ner = ner_task(sentence)
+    formatted_ner = f"Number of predicted entities: {len(result_ner)}\n\n"
+    # Print individual entities.
+    # Start the count from 1 for intuitive reading.
+    for i, result in enumerate(result_ner):
+        formatted_ner +=   f"Number: {i+1} \n" \
+                         + f"Entity: {result['entity_group']}\n" \
+                         + f"Word group: {result['word']}\n" \
+                         + f"Score: {result['score']}\n"
+        formatted_ner += f"{result}\n\n"
+    return token_pieces, formatted_ner
+# Main Gradio interface
+demo = gr.Interface(
+    fn = predict,
+    inputs = [gr.TextArea(label="Place your sentence here", lines=10,
+                          show_copy_button=True)],
+    outputs =
+        [
+            gr.TextArea(label="Tokens input to the model", interactive=False,
+                        lines=10, show_copy_button=True),
+            gr.TextArea(label="Prediction of entities", interactive=False,
+                        lines=10, show_copy_button=True)
+        ],
+    examples=[[sentence1], [sentence2], [sentence3], [sentence4],
+              [sentence5], [sentence6], [sentence7], [sentence8]],
+    title = "NER (Named Entities Recognition)",
+    description = f"""
+         ## Using model {model_checkpoint} to predict entities type
+         <p style="font-size: 1.2rem;">Notes: </p>
+         <ul style="font-size: 1.2rem; list-style-type:square">
+         <li>  The examples are from the calendar utility in Linux.
+         <li>  The model cannot recognize date and time.
+         <li> It can recongize PER (person), LOC (location), ORG (organization) and MIS (miscellaneous)
+         entities.
+         </ul>
+         """
+)
+demo.launch()