Chan-Y commited on
Commit
a43e6d5
1 Parent(s): 65033bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -11
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import warnings
2
  warnings.simplefilter(action='ignore', category=FutureWarning)
 
 
 
3
 
4
  import fitz
5
  import gradio as gr
@@ -20,7 +23,7 @@ llm = HuggingFaceEndpoint(
20
  llm_engine_hf = ChatHuggingFace(llm=llm)
21
 
22
  def read_pdf(file_path):
23
- print("It is a PDF file")
24
  try:
25
  pdf_document = fitz.open(file_path)
26
  text = ""
@@ -28,15 +31,28 @@ def read_pdf(file_path):
28
  page = pdf_document[page_num]
29
  text += page.get_text()
30
 
 
 
 
 
 
31
  return text
 
32
  except Exception as e:
33
- print("Error reading file,", e)
 
 
34
 
35
  def read_txt(file_path):
36
- print("It is not a PDF file")
37
- with open(file_path, "r", encoding="utf-8") as f:
38
- text = f.read()
39
- return text
 
 
 
 
 
40
 
41
  def summarize(file, n_words):
42
  global llm
@@ -47,11 +63,8 @@ def summarize(file, n_words):
47
  else:
48
  text = read_txt(file_path)
49
 
50
- print("Length of text is ", len(text))
51
- if len(text) > CONTEXT_WINDOW:
52
- print(f"Slicing the first {CONTEXT_WINDOW} characters")
53
- text = text[:CONTEXT_WINDOW]
54
-
55
  lang = detect(text[:CONTEXT_WINDOW])
56
  template_translate = '''
57
  Please carefully read the following document:
@@ -74,6 +87,8 @@ The summary should be in {LANG} language.
74
  formatted_prompt = prompt_summarize.format(TEXT=chunk, LANG=lang)
75
  summary = llm.invoke(formatted_prompt)
76
  summaries.append(summary)
 
 
77
 
78
  final_summary = "\n\n".join(summaries)
79
  return final_summary
 
1
  import warnings
2
  warnings.simplefilter(action='ignore', category=FutureWarning)
3
+ import logging
4
+ logging.basicConfig(level=logging.INFO)
5
+ logger = logging.getLogger(__name__)
6
 
7
  import fitz
8
  import gradio as gr
 
23
  llm_engine_hf = ChatHuggingFace(llm=llm)
24
 
25
  def read_pdf(file_path):
26
+ logger.info("Reading a PDF file")
27
  try:
28
  pdf_document = fitz.open(file_path)
29
  text = ""
 
31
  page = pdf_document[page_num]
32
  text += page.get_text()
33
 
34
+ if not text.strip():
35
+ message = "PDF contains no text. It may be due to the PDF being password-protected, collapsed, or full of images."
36
+ logger.info(message)
37
+ return message
38
+
39
  return text
40
+
41
  except Exception as e:
42
+ error_message = f"Error reading PDF file: {e}"
43
+ logger.error(error_message)
44
+ return error_message
45
 
46
  def read_txt(file_path):
47
+ logger.info("Reading a TXT file")
48
+ try:
49
+ with open(file_path, "r", encoding="utf-8") as f:
50
+ text = f.read()
51
+ return text
52
+ except Exception as e:
53
+ error_message = f"Error reading TXT file: {e}"
54
+ logger.error(error_message)
55
+ return error_message
56
 
57
  def summarize(file, n_words):
58
  global llm
 
63
  else:
64
  text = read_txt(file_path)
65
 
66
+ logger.info("Length of text is %d", len(text))
67
+
 
 
 
68
  lang = detect(text[:CONTEXT_WINDOW])
69
  template_translate = '''
70
  Please carefully read the following document:
 
87
  formatted_prompt = prompt_summarize.format(TEXT=chunk, LANG=lang)
88
  summary = llm.invoke(formatted_prompt)
89
  summaries.append(summary)
90
+
91
+ logger.info(f"Chunked into {len(summaries)}.")
92
 
93
  final_summary = "\n\n".join(summaries)
94
  return final_summary