langdonholmes commited on
Commit
287a33f
β€’
1 Parent(s): 5806da1

functioning

Browse files
anonymize.py β†’ anonymizer.py RENAMED
@@ -26,7 +26,12 @@ def anonymize(
26
  res = anonymizer.anonymize(
27
  text,
28
  analyze_results,
29
- operators={"STUDENT": OperatorConfig("custom", {"lambda": generate_surrogate})}
 
 
 
 
 
30
  )
31
 
32
  return res.text
 
26
  res = anonymizer.anonymize(
27
  text,
28
  analyze_results,
29
+ operators={
30
+ "STUDENT": OperatorConfig("custom", {"lambda": generate_surrogate}),
31
+ "EMAIL_ADDRESS": OperatorConfig("replace", {"new_value": "[email protected]"}),
32
+ "PHONE_NUMBER": OperatorConfig("replace", {"new_value": "888-888-8888"}),
33
+ "URL": OperatorConfig("replace", {"new_value": "aol.com"}),
34
+ }
35
  )
36
 
37
  return res.text
app.py CHANGED
@@ -2,6 +2,7 @@
2
  """Streamlit app for Student Name Detection models."""
3
 
4
  from spacy_analyzer import prepare_analyzer
 
5
  from presidio_anonymizer import AnonymizerEngine
6
  import pandas as pd
7
  from annotated_text import annotated_text
@@ -33,7 +34,7 @@ def anonymizer_engine():
33
  """Return AnonymizerEngine."""
34
  return AnonymizerEngine()
35
 
36
- def annotate(text, st_analyze_results):
37
  tokens = []
38
  # sort by start index
39
  results = sorted(st_analyze_results, key=lambda x: x.start)
@@ -63,8 +64,8 @@ st.sidebar.markdown(
63
 
64
  st_entities = st.sidebar.multiselect(
65
  label="Which entities to look for?",
66
- options=get_supported_entities(),
67
- default=list(get_supported_entities()),
68
  )
69
 
70
  st_threshold = st.sidebar.slider(
@@ -100,7 +101,7 @@ if 'first_load' not in st.session_state:
100
  st.subheader("Analyzed")
101
  with st.spinner("Analyzing..."):
102
  if button or st.session_state.first_load:
103
- st_analyze_results = analyze(
104
  text=st_text,
105
  entities=st_entities,
106
  language="en",
@@ -118,9 +119,11 @@ st.subheader("Anonymized")
118
 
119
  with st.spinner("Anonymizing..."):
120
  if button or st.session_state.first_load:
121
- st_anonymize_results = anonymize(st_text, st_analyze_results)
 
 
122
  st_anonymize_results
123
-
124
  # table result
125
  st.subheader("Detailed Findings")
126
  if st_analyze_results:
 
2
  """Streamlit app for Student Name Detection models."""
3
 
4
  from spacy_analyzer import prepare_analyzer
5
+ from anonymizer import anonymize
6
  from presidio_anonymizer import AnonymizerEngine
7
  import pandas as pd
8
  from annotated_text import annotated_text
 
34
  """Return AnonymizerEngine."""
35
  return AnonymizerEngine()
36
 
37
+ def annotate(text, st_analyze_results, st_entities):
38
  tokens = []
39
  # sort by start index
40
  results = sorted(st_analyze_results, key=lambda x: x.start)
 
64
 
65
  st_entities = st.sidebar.multiselect(
66
  label="Which entities to look for?",
67
+ options=analyzer_engine().get_supported_entities(),
68
+ default=list(analyzer_engine().get_supported_entities()),
69
  )
70
 
71
  st_threshold = st.sidebar.slider(
 
101
  st.subheader("Analyzed")
102
  with st.spinner("Analyzing..."):
103
  if button or st.session_state.first_load:
104
+ st_analyze_results = analyzer_engine().analyze(
105
  text=st_text,
106
  entities=st_entities,
107
  language="en",
 
119
 
120
  with st.spinner("Anonymizing..."):
121
  if button or st.session_state.first_load:
122
+ st_anonymize_results = anonymize(anonymizer_engine(),
123
+ st_text,
124
+ st_analyze_results)
125
  st_anonymize_results
126
+
127
  # table result
128
  st.subheader("Detailed Findings")
129
  if st_analyze_results:
spacy_analyzer.py CHANGED
@@ -1,6 +1,7 @@
1
 
2
  from presidio_analyzer import (
3
  AnalyzerEngine,
 
4
  RecognizerRegistry,
5
  LocalRecognizer,
6
  AnalysisExplanation,
@@ -72,6 +73,7 @@ class CustomSpacyRecognizer(LocalRecognizer):
72
  return explanation
73
 
74
  def analyze(self,
 
75
  entities: list[str] = None,
76
  nlp_artifacts: NlpArtifacts = None):
77
  """Analyze input using Analyzer engine and input arguments (kwargs)."""
 
1
 
2
  from presidio_analyzer import (
3
  AnalyzerEngine,
4
+ RecognizerResult,
5
  RecognizerRegistry,
6
  LocalRecognizer,
7
  AnalysisExplanation,
 
73
  return explanation
74
 
75
  def analyze(self,
76
+ text: str,
77
  entities: list[str] = None,
78
  nlp_artifacts: NlpArtifacts = None):
79
  """Analyze input using Analyzer engine and input arguments (kwargs)."""