Spaces:
Sleeping
Sleeping
langdonholmes
commited on
Commit
•
e3f8caf
1
Parent(s):
5eb2119
Removing unused labels from spacy_recognizer.py
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
|
2 |
"""Streamlit app for Student Name Detection models."""
|
3 |
|
4 |
-
import spacy
|
5 |
from spacy_recognizer import CustomSpacyRecognizer
|
6 |
from presidio_analyzer.nlp_engine import NlpEngineProvider
|
7 |
from presidio_anonymizer import AnonymizerEngine
|
@@ -46,18 +45,15 @@ def analyzer_engine():
|
|
46 |
|
47 |
return analyzer
|
48 |
|
49 |
-
|
50 |
@st.cache(allow_output_mutation=True)
|
51 |
def anonymizer_engine():
|
52 |
"""Return AnonymizerEngine."""
|
53 |
return AnonymizerEngine()
|
54 |
|
55 |
-
|
56 |
def get_supported_entities():
|
57 |
"""Return supported entities from the Analyzer Engine."""
|
58 |
return analyzer_engine().get_supported_entities()
|
59 |
|
60 |
-
|
61 |
def analyze(**kwargs):
|
62 |
"""Analyze input using Analyzer engine and input arguments (kwargs)."""
|
63 |
if "entities" not in kwargs or "All" in kwargs["entities"]:
|
@@ -79,11 +75,9 @@ def anonymize(text, analyze_results):
|
|
79 |
text,
|
80 |
analyze_results,
|
81 |
operators={"STUDENT": OperatorConfig("custom", {"lambda": generate_surrogate})}
|
82 |
-
|
83 |
)
|
84 |
return res.text
|
85 |
|
86 |
-
|
87 |
def annotate(text, st_analyze_results, st_entities):
|
88 |
tokens = []
|
89 |
# sort by start index
|
@@ -129,7 +123,6 @@ st.sidebar.info(
|
|
129 |
"This is part of a deidentification project for student-generated text."
|
130 |
)
|
131 |
|
132 |
-
|
133 |
# Main panel
|
134 |
analyzer_load_state = st.info(
|
135 |
"Starting Presidio analyzer and loading Longformer-based model...")
|
@@ -162,6 +155,7 @@ with st.spinner("Analyzing..."):
|
|
162 |
annotated_tokens = annotate(st_text, st_analyze_results, st_entities)
|
163 |
# annotated_tokens
|
164 |
annotated_text(*annotated_tokens)
|
|
|
165 |
# vertical space
|
166 |
st.text("")
|
167 |
|
@@ -172,7 +166,6 @@ with st.spinner("Anonymizing..."):
|
|
172 |
st_anonymize_results = anonymize(st_text, st_analyze_results)
|
173 |
st_anonymize_results
|
174 |
|
175 |
-
|
176 |
# table result
|
177 |
st.subheader("Detailed Findings")
|
178 |
if st_analyze_results:
|
@@ -197,8 +190,6 @@ else:
|
|
197 |
st.session_state['first_load'] = True
|
198 |
|
199 |
# json result
|
200 |
-
|
201 |
-
|
202 |
class ToDictListEncoder(JSONEncoder):
|
203 |
"""Encode dict to json."""
|
204 |
|
@@ -208,6 +199,5 @@ class ToDictListEncoder(JSONEncoder):
|
|
208 |
return o.to_dict()
|
209 |
return []
|
210 |
|
211 |
-
|
212 |
if st_return_decision_process:
|
213 |
st.json(json.dumps(st_analyze_results, cls=ToDictListEncoder))
|
|
|
1 |
|
2 |
"""Streamlit app for Student Name Detection models."""
|
3 |
|
|
|
4 |
from spacy_recognizer import CustomSpacyRecognizer
|
5 |
from presidio_analyzer.nlp_engine import NlpEngineProvider
|
6 |
from presidio_anonymizer import AnonymizerEngine
|
|
|
45 |
|
46 |
return analyzer
|
47 |
|
|
|
48 |
@st.cache(allow_output_mutation=True)
|
49 |
def anonymizer_engine():
|
50 |
"""Return AnonymizerEngine."""
|
51 |
return AnonymizerEngine()
|
52 |
|
|
|
53 |
def get_supported_entities():
|
54 |
"""Return supported entities from the Analyzer Engine."""
|
55 |
return analyzer_engine().get_supported_entities()
|
56 |
|
|
|
57 |
def analyze(**kwargs):
|
58 |
"""Analyze input using Analyzer engine and input arguments (kwargs)."""
|
59 |
if "entities" not in kwargs or "All" in kwargs["entities"]:
|
|
|
75 |
text,
|
76 |
analyze_results,
|
77 |
operators={"STUDENT": OperatorConfig("custom", {"lambda": generate_surrogate})}
|
|
|
78 |
)
|
79 |
return res.text
|
80 |
|
|
|
81 |
def annotate(text, st_analyze_results, st_entities):
|
82 |
tokens = []
|
83 |
# sort by start index
|
|
|
123 |
"This is part of a deidentification project for student-generated text."
|
124 |
)
|
125 |
|
|
|
126 |
# Main panel
|
127 |
analyzer_load_state = st.info(
|
128 |
"Starting Presidio analyzer and loading Longformer-based model...")
|
|
|
155 |
annotated_tokens = annotate(st_text, st_analyze_results, st_entities)
|
156 |
# annotated_tokens
|
157 |
annotated_text(*annotated_tokens)
|
158 |
+
|
159 |
# vertical space
|
160 |
st.text("")
|
161 |
|
|
|
166 |
st_anonymize_results = anonymize(st_text, st_analyze_results)
|
167 |
st_anonymize_results
|
168 |
|
|
|
169 |
# table result
|
170 |
st.subheader("Detailed Findings")
|
171 |
if st_analyze_results:
|
|
|
190 |
st.session_state['first_load'] = True
|
191 |
|
192 |
# json result
|
|
|
|
|
193 |
class ToDictListEncoder(JSONEncoder):
|
194 |
"""Encode dict to json."""
|
195 |
|
|
|
199 |
return o.to_dict()
|
200 |
return []
|
201 |
|
|
|
202 |
if st_return_decision_process:
|
203 |
st.json(json.dumps(st_analyze_results, cls=ToDictListEncoder))
|