pszemraj commited on
Commit
a96d344
Β·
1 Parent(s): 9792c03

🎨 format

Browse files

Signed-off-by: peter szemraj <[email protected]>

Files changed (2) hide show
  1. app.py +3 -1
  2. pdf2text.py +7 -2
app.py CHANGED
@@ -146,7 +146,9 @@ if __name__ == "__main__":
146
 
147
  gr.Markdown("## Load Inputs")
148
  gr.Markdown("Upload your own file & replace the default")
149
- gr.Markdown("_If no file is uploaded, a sample PDF will be used. PDFs are truncated to 20 pages._")
 
 
150
 
151
  uploaded_file = gr.File(
152
  label="Upload a PDF file",
 
146
 
147
  gr.Markdown("## Load Inputs")
148
  gr.Markdown("Upload your own file & replace the default")
149
+ gr.Markdown(
150
+ "_If no file is uploaded, a sample PDF will be used. PDFs are truncated to 20 pages._"
151
+ )
152
 
153
  uploaded_file = gr.File(
154
  label="Upload a PDF file",
pdf2text.py CHANGED
@@ -32,12 +32,12 @@ from spellchecker import SpellChecker
32
  from tqdm.auto import tqdm
33
 
34
 
35
-
36
  def simple_rename(filepath, target_ext=".txt"):
37
  _fp = Path(filepath)
38
  basename = _fp.stem
39
  return f"OCR_{basename}_{target_ext}"
40
 
 
41
  def rm_local_text_files(name_contains="RESULT_"):
42
  """
43
  rm_local_text_files - remove local text files
@@ -45,12 +45,17 @@ def rm_local_text_files(name_contains="RESULT_"):
45
  Args:
46
  name_contains (str, optional): [description]. Defaults to "OCR_".
47
  """
48
- files = [f for f in Path.cwd().iterdir() if f.is_file() and f.suffix == '.txt' and name_contains in f.name]
 
 
 
 
49
  logging.info(f"removing {len(files)} text files")
50
  for f in files:
51
  os.remove(f)
52
  logging.info("done")
53
 
 
54
  def corr(
55
  s: str,
56
  add_space_when_numerics=False,
 
32
  from tqdm.auto import tqdm
33
 
34
 
 
35
  def simple_rename(filepath, target_ext=".txt"):
36
  _fp = Path(filepath)
37
  basename = _fp.stem
38
  return f"OCR_{basename}_{target_ext}"
39
 
40
+
41
  def rm_local_text_files(name_contains="RESULT_"):
42
  """
43
  rm_local_text_files - remove local text files
 
45
  Args:
46
  name_contains (str, optional): [description]. Defaults to "OCR_".
47
  """
48
+ files = [
49
+ f
50
+ for f in Path.cwd().iterdir()
51
+ if f.is_file() and f.suffix == ".txt" and name_contains in f.name
52
+ ]
53
  logging.info(f"removing {len(files)} text files")
54
  for f in files:
55
  os.remove(f)
56
  logging.info("done")
57
 
58
+
59
  def corr(
60
  s: str,
61
  add_space_when_numerics=False,