Ashish Soni commited on
Commit
cb27a07
·
verified ·
1 Parent(s): 47cf71e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -9
app.py CHANGED
@@ -1,11 +1,25 @@
1
  import gradio as gr
 
2
  from transformers import pipeline
 
3
 
4
- def merge_tokens(tokens):
 
 
 
 
 
 
 
 
 
 
 
 
5
  merged_tokens = []
6
  for token in tokens:
7
  if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
8
- # If current token continues the entity of the last one, merge them
9
  last_token = merged_tokens[-1]
10
  last_token['word'] += token['word'].replace('##', '')
11
  last_token['end'] = token['end']
@@ -16,19 +30,51 @@ def merge_tokens(tokens):
16
 
17
  return merged_tokens
18
 
19
- get_completion = pipeline("ner", model="dslim/bert-base-NER")
 
 
 
 
 
 
 
 
 
 
20
 
21
- def ner(input):
 
 
 
 
 
 
22
  output = get_completion(input)
23
  merged_tokens = merge_tokens(output)
24
  return {"text": input, "entities": merged_tokens}
25
 
26
- gr.close_all()
27
- demo = gr.Interface(fn=ner,
28
- inputs=[gr.Textbox(label="Text to find entities", lines=2)],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  outputs=[gr.HighlightedText(label="Text with entities")],
30
- title="NER with dslim/bert-base-NER",
31
- description="Find entities using the `dslim/bert-base-NER` model under the hood!",
32
  allow_flagging="never",
33
  examples=["My name is Andrew, I'm building DeeplearningAI and I live in California", "My name is Poli, I live in Vienna and work at HuggingFace"])
34
 
 
1
  import gradio as gr
2
+ import spaces
3
  from transformers import pipeline
4
+ from typing import List, Dict, Any
5
 
6
+ def merge_tokens(tokens: List[Dict[str, any]]) -> List[Dict[str, any]]:
7
+ """
8
+ Merges tokens that belong to the same entity into a single token.
9
+
10
+ Args:
11
+ tokens (List[Dict[str, any]]): A list of token dictionaries, each containing information about
12
+ the entity, word, start, end, and score.
13
+
14
+ Returns:
15
+ List[Dict[str, any]]: A list of merged token dictionaries, where tokens that are part of the
16
+ same entity are combined into a single token with updated word, end,
17
+ and score values.
18
+ """
19
  merged_tokens = []
20
  for token in tokens:
21
  if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
22
+ # If the current token continues the entity of the last one, merge them
23
  last_token = merged_tokens[-1]
24
  last_token['word'] += token['word'].replace('##', '')
25
  last_token['end'] = token['end']
 
30
 
31
  return merged_tokens
32
 
33
+ # Initialize Model
34
+ get_completion = pipeline("ner", model="dslim/bert-base-NER", device=0)
35
+
36
+ @spaces.GPU(duration=120)
37
+ def ner(input: str) -> Dict[str, Any]:
38
+ """
39
+ Performs Named Entity Recognition (NER) on the given input text and merges tokens that belong
40
+ to the same entity into a single entity.
41
+
42
+ Args:
43
+ input (str): The input text to analyze for named entities.
44
 
45
+ Returns:
46
+ Dict[str, Any]: A dictionary containing the original text and a list of identified entities
47
+ with merged tokens.
48
+ - "text": The original input text.
49
+ - "entities": A list of dictionaries, where each dictionary contains information
50
+ about a recognized entity, including the word, entity type, score, and positions.
51
+ """
52
  output = get_completion(input)
53
  merged_tokens = merge_tokens(output)
54
  return {"text": input, "entities": merged_tokens}
55
 
56
+ ####### GRADIO APP #######
57
+ title = """<h1 id="title"> Named Entity Recognition </h1>"""
58
+
59
+ description = """
60
+ - The model used for Recognizing entities [BERT-BASE-NER](https://huggingface.co/dslim/bert-base-NER).
61
+ """
62
+
63
+ css = '''
64
+ h1#title {
65
+ text-align: center;
66
+ }
67
+ '''
68
+
69
+ theme = gr.themes.Soft()
70
+ demo = gr.Blocks(css=css, theme=theme)
71
+
72
+ with demo:
73
+ gr.Markdown(title)
74
+ gr.Markdown(description)
75
+ interface = gr.Interface(fn=ner,
76
+ inputs=[gr.Textbox(label="Text to find entities", lines=10)],
77
  outputs=[gr.HighlightedText(label="Text with entities")],
 
 
78
  allow_flagging="never",
79
  examples=["My name is Andrew, I'm building DeeplearningAI and I live in California", "My name is Poli, I live in Vienna and work at HuggingFace"])
80