edithram23 commited on
Commit
908eafd
1 Parent(s): 67ff28f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -5,8 +5,10 @@ import fitz # PyMuPDF
5
  from docx import Document
6
  import re
7
  import nltk
 
8
  nltk.download('punkt')
9
 
 
10
  def sentence_tokenize(text):
11
  sentences = nltk.sent_tokenize(text)
12
  return sentences
@@ -28,7 +30,6 @@ model_large = AutoModelForSeq2SeqLM.from_pretrained(model_dir_large)
28
  # pattern = r'\[.*?\]'
29
  # redacted_text = re.sub(pattern, '[redacted]', predicted_title)
30
  # return redacted_text
31
- from presidio_analyzer import AnalyzerEngine, PatternRecognizer, RecognizerResult, Pattern
32
 
33
  # Initialize the analyzer engine
34
  analyzer = AnalyzerEngine()
 
5
  from docx import Document
6
  import re
7
  import nltk
8
+ from presidio_analyzer import AnalyzerEngine, PatternRecognizer, RecognizerResult, Pattern
9
  nltk.download('punkt')
10
 
11
+
12
  def sentence_tokenize(text):
13
  sentences = nltk.sent_tokenize(text)
14
  return sentences
 
30
  # pattern = r'\[.*?\]'
31
  # redacted_text = re.sub(pattern, '[redacted]', predicted_title)
32
  # return redacted_text
 
33
 
34
  # Initialize the analyzer engine
35
  analyzer = AnalyzerEngine()