langdonholmes commited on
Commit
e3f8caf
1 Parent(s): 5eb2119

Removing unused labels from spacy_recognizer.py

Browse files
Files changed (1) hide show
  1. app.py +1 -11
app.py CHANGED
@@ -1,7 +1,6 @@
1
 
2
  """Streamlit app for Student Name Detection models."""
3
 
4
- import spacy
5
  from spacy_recognizer import CustomSpacyRecognizer
6
  from presidio_analyzer.nlp_engine import NlpEngineProvider
7
  from presidio_anonymizer import AnonymizerEngine
@@ -46,18 +45,15 @@ def analyzer_engine():
46
 
47
  return analyzer
48
 
49
-
50
  @st.cache(allow_output_mutation=True)
51
  def anonymizer_engine():
52
  """Return AnonymizerEngine."""
53
  return AnonymizerEngine()
54
 
55
-
56
  def get_supported_entities():
57
  """Return supported entities from the Analyzer Engine."""
58
  return analyzer_engine().get_supported_entities()
59
 
60
-
61
  def analyze(**kwargs):
62
  """Analyze input using Analyzer engine and input arguments (kwargs)."""
63
  if "entities" not in kwargs or "All" in kwargs["entities"]:
@@ -79,11 +75,9 @@ def anonymize(text, analyze_results):
79
  text,
80
  analyze_results,
81
  operators={"STUDENT": OperatorConfig("custom", {"lambda": generate_surrogate})}
82
-
83
  )
84
  return res.text
85
 
86
-
87
  def annotate(text, st_analyze_results, st_entities):
88
  tokens = []
89
  # sort by start index
@@ -129,7 +123,6 @@ st.sidebar.info(
129
  "This is part of a deidentification project for student-generated text."
130
  )
131
 
132
-
133
  # Main panel
134
  analyzer_load_state = st.info(
135
  "Starting Presidio analyzer and loading Longformer-based model...")
@@ -162,6 +155,7 @@ with st.spinner("Analyzing..."):
162
  annotated_tokens = annotate(st_text, st_analyze_results, st_entities)
163
  # annotated_tokens
164
  annotated_text(*annotated_tokens)
 
165
  # vertical space
166
  st.text("")
167
 
@@ -172,7 +166,6 @@ with st.spinner("Anonymizing..."):
172
  st_anonymize_results = anonymize(st_text, st_analyze_results)
173
  st_anonymize_results
174
 
175
-
176
  # table result
177
  st.subheader("Detailed Findings")
178
  if st_analyze_results:
@@ -197,8 +190,6 @@ else:
197
  st.session_state['first_load'] = True
198
 
199
  # json result
200
-
201
-
202
  class ToDictListEncoder(JSONEncoder):
203
  """Encode dict to json."""
204
 
@@ -208,6 +199,5 @@ class ToDictListEncoder(JSONEncoder):
208
  return o.to_dict()
209
  return []
210
 
211
-
212
  if st_return_decision_process:
213
  st.json(json.dumps(st_analyze_results, cls=ToDictListEncoder))
 
1
 
2
  """Streamlit app for Student Name Detection models."""
3
 
 
4
  from spacy_recognizer import CustomSpacyRecognizer
5
  from presidio_analyzer.nlp_engine import NlpEngineProvider
6
  from presidio_anonymizer import AnonymizerEngine
 
45
 
46
  return analyzer
47
 
 
48
  @st.cache(allow_output_mutation=True)
49
  def anonymizer_engine():
50
  """Return AnonymizerEngine."""
51
  return AnonymizerEngine()
52
 
 
53
  def get_supported_entities():
54
  """Return supported entities from the Analyzer Engine."""
55
  return analyzer_engine().get_supported_entities()
56
 
 
57
  def analyze(**kwargs):
58
  """Analyze input using Analyzer engine and input arguments (kwargs)."""
59
  if "entities" not in kwargs or "All" in kwargs["entities"]:
 
75
  text,
76
  analyze_results,
77
  operators={"STUDENT": OperatorConfig("custom", {"lambda": generate_surrogate})}
 
78
  )
79
  return res.text
80
 
 
81
  def annotate(text, st_analyze_results, st_entities):
82
  tokens = []
83
  # sort by start index
 
123
  "This is part of a deidentification project for student-generated text."
124
  )
125
 
 
126
  # Main panel
127
  analyzer_load_state = st.info(
128
  "Starting Presidio analyzer and loading Longformer-based model...")
 
155
  annotated_tokens = annotate(st_text, st_analyze_results, st_entities)
156
  # annotated_tokens
157
  annotated_text(*annotated_tokens)
158
+
159
  # vertical space
160
  st.text("")
161
 
 
166
  st_anonymize_results = anonymize(st_text, st_analyze_results)
167
  st_anonymize_results
168
 
 
169
  # table result
170
  st.subheader("Detailed Findings")
171
  if st_analyze_results:
 
190
  st.session_state['first_load'] = True
191
 
192
  # json result
 
 
193
  class ToDictListEncoder(JSONEncoder):
194
  """Encode dict to json."""
195
 
 
199
  return o.to_dict()
200
  return []
201
 
 
202
  if st_return_decision_process:
203
  st.json(json.dumps(st_analyze_results, cls=ToDictListEncoder))