Mistral-7B-Summarizer-v2

Sleeping

Chan-Y commited on Jun 14, 2024

Commit

026783f

verified ·

1 Parent(s): f5d655c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import warnings
 warnings.simplefilter(action='ignore', category=FutureWarning)
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from langchain.chains.summarize import load_summarize_chain
@@ -18,11 +19,22 @@ llm = HuggingFaceEndpoint(
 )
 llm_engine_hf = ChatHuggingFace(llm=llm)
 def summarize(file, n_words):
     # Read the content of the uploaded file
     file_path = file.name
-    with open(file_path, 'r', encoding='utf-8') as f:
-        file_content = f.read()
     document = Document(file_content)
     # Generate the summary
     text = document.page_content

 import warnings
 warnings.simplefilter(action='ignore', category=FutureWarning)
+import PyPDF2
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from langchain.chains.summarize import load_summarize_chain
 )
 llm_engine_hf = ChatHuggingFace(llm=llm)
+def read_pdf(file_path):
+    pdf_reader = PyPDF2.PdfReader(file_path)
+    text = ""
+    for page in range(len(pdf_reader.pages)):
+        text += pdf_reader.pages[page].extract_text()
+    return text
 def summarize(file, n_words):
     # Read the content of the uploaded file
     file_path = file.name
+    if file_path.endswith('.pdf'):
+        file_content = read_pdf(file_path)
+    else:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            file_content = f.read()
     document = Document(file_content)
     # Generate the summary
     text = document.page_content