lastspace / app.py
shaima21's picture
Rename app (1).py to app.py
6370933 verified
import streamlit as st
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
import torch
from PyPDF2 import PdfReader
# Load the summarization pipeline (Hugging Face model)
st.subheader("File Summarization Tool")
# Check if GPU is available
device = 0 if torch.cuda.is_available() else -1
# Use a more general model loading approach for better error handling
try:
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
#summarizer = pipeline("summarization", model="facebook/bart-large", device=device)
except Exception as e:
st.error(f"Error loading model: {str(e)}")
summarizer = None
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
reader = PdfReader(pdf_file)
text = ""
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text += page.extract_text()
return text
# Function to extract text from a TXT file
def extract_text_from_txt(txt_file):
return txt_file.read().decode("utf-8")
# Streamlit file uploader
file = st.file_uploader("Upload a PDF or TXT file", type=["pdf", "txt"])
# Text input area for user-provided text
user_text = st.text_area("Or write your text here:", "")
if (file or user_text) and summarizer:
try:
# Extract text based on input type
if file:
if file.type == "application/pdf":
text = extract_text_from_pdf(file)
elif file.type == "text/plain":
text = extract_text_from_txt(file)
else:
st.error("Unsupported file type.")
text = ""
else:
text = user_text
if len(text) > 0:
# Function to split the text into chunks of a fixed size
def split_text_into_chunks(text, chunk_size=512):
words = text.split()
for i in range(0, len(words), chunk_size):
yield " ".join(words[i:i + chunk_size])
# Split the text into chunks
chunks = list(split_text_into_chunks(text))
summaries = []
# Summarize each chunk
for chunk in chunks:
summarized_chunk = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
summaries.append(summarized_chunk[0]['summary_text'])
# Combine summaries from all chunks
summary = " ".join(summaries)
# Display the summary
st.subheader("Summary")
st.write(summary)
else:
st.warning("No text could be extracted from the file or provided by the user.")
except Exception as e:
st.error(f"An error occurred during summarization: {str(e)}")