Spaces:

shaimaa1
/

newproject

Sleeping

App Files Files Community

shaimaa1 commited on 20 days ago

Commit

57eb1ae

verified ·

1 Parent(s): 5087b52

Create app.py

Browse files

Files changed (1) hide show

app.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import streamlit as st
+from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
+import torch
+from PyPDF2 import PdfReader
+# Load the summarization pipeline (Hugging Face model)
+st.subheader("File Summarization Tool")
+# Check if GPU is available
+device = 0 if torch.cuda.is_available() else -1
+# Use a more general model loading approach for better error handling
+try:
+    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
+    #summarizer = pipeline("summarization", model="facebook/bart-large", device=device)
+except Exception as e:
+    st.error(f"Error loading model: {str(e)}")
+    summarizer = None
+# Function to extract text from a PDF file
+def extract_text_from_pdf(pdf_file):
+    reader = PdfReader(pdf_file)
+    text = ""
+    for page_num in range(len(reader.pages)):
+        page = reader.pages[page_num]
+        text += page.extract_text()
+    return text
+# Function to extract text from a TXT file
+def extract_text_from_txt(txt_file):
+    return txt_file.read().decode("utf-8")
+# Streamlit file uploader
+file = st.file_uploader("Upload a PDF or TXT file", type=["pdf", "txt"])
+# Text input area for user-provided text
+user_text = st.text_area("Or write your text here:", "")
+if (file or user_text) and summarizer:
+    try:
+        # Extract text based on input type
+        if file:
+            if file.type == "application/pdf":
+                text = extract_text_from_pdf(file)
+            elif file.type == "text/plain":
+                text = extract_text_from_txt(file)
+            else:
+                st.error("Unsupported file type.")
+                text = ""
+        else:
+            text = user_text
+        if len(text) > 0:
+            # Function to split the text into chunks of a fixed size
+            def split_text_into_chunks(text, chunk_size=512):
+                words = text.split()
+                for i in range(0, len(words), chunk_size):
+                    yield " ".join(words[i:i + chunk_size])
+            # Split the text into chunks
+            chunks = list(split_text_into_chunks(text))
+            summaries = []
+            # Summarize each chunk
+            for chunk in chunks:
+                summarized_chunk = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
+                summaries.append(summarized_chunk[0]['summary_text'])
+            # Combine summaries from all chunks
+            summary = " ".join(summaries)
+            # Display the summary
+            st.subheader("Summary")
+            st.write(summary)
+        else:
+            st.warning("No text could be extracted from the file or provided by the user.")
+    except Exception as e:
+        st.error(f"An error occurred during summarization: {str(e)}")