import streamlit as st from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer import torch from PyPDF2 import PdfReader # Load the summarization pipeline (Hugging Face model) st.subheader("File Summarization Tool") # Check if GPU is available device = 0 if torch.cuda.is_available() else -1 # Use a more general model loading approach for better error handling try: summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device) #summarizer = pipeline("summarization", model="facebook/bart-large", device=device) except Exception as e: st.error(f"Error loading model: {str(e)}") summarizer = None # Function to extract text from a PDF file def extract_text_from_pdf(pdf_file): reader = PdfReader(pdf_file) text = "" for page_num in range(len(reader.pages)): page = reader.pages[page_num] text += page.extract_text() return text # Function to extract text from a TXT file def extract_text_from_txt(txt_file): return txt_file.read().decode("utf-8") # Streamlit file uploader file = st.file_uploader("Upload a PDF or TXT file", type=["pdf", "txt"]) # Text input area for user-provided text user_text = st.text_area("Or write your text here:", "") if (file or user_text) and summarizer: try: # Extract text based on input type if file: if file.type == "application/pdf": text = extract_text_from_pdf(file) elif file.type == "text/plain": text = extract_text_from_txt(file) else: st.error("Unsupported file type.") text = "" else: text = user_text if len(text) > 0: # Function to split the text into chunks of a fixed size def split_text_into_chunks(text, chunk_size=512): words = text.split() for i in range(0, len(words), chunk_size): yield " ".join(words[i:i + chunk_size]) # Split the text into chunks chunks = list(split_text_into_chunks(text)) summaries = [] # Summarize each chunk for chunk in chunks: summarized_chunk = summarizer(chunk, max_length=130, min_length=30, do_sample=False) summaries.append(summarized_chunk[0]['summary_text']) # Combine summaries from all chunks summary = " ".join(summaries) # Display the summary st.subheader("Summary") st.write(summary) else: st.warning("No text could be extracted from the file or provided by the user.") except Exception as e: st.error(f"An error occurred during summarization: {str(e)}")