File size: 2,769 Bytes
7bb7c6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import streamlit as st
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
import torch
from PyPDF2 import PdfReader

# Load the summarization pipeline (Hugging Face model)
st.subheader("File Summarization Tool")

# Check if GPU is available
device = 0 if torch.cuda.is_available() else -1

# Use a more general model loading approach for better error handling
try:
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
    #summarizer = pipeline("summarization", model="facebook/bart-large", device=device)
except Exception as e:
    st.error(f"Error loading model: {str(e)}")
    summarizer = None


# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
    reader = PdfReader(pdf_file)
    text = ""
    for page_num in range(len(reader.pages)):
        page = reader.pages[page_num]
        text += page.extract_text()
    return text


# Function to extract text from a TXT file
def extract_text_from_txt(txt_file):
    return txt_file.read().decode("utf-8")


# Streamlit file uploader
file = st.file_uploader("Upload a PDF or TXT file", type=["pdf", "txt"])

# Text input area for user-provided text
user_text = st.text_area("Or write your text here:", "")

if (file or user_text) and summarizer:
    try:
        # Extract text based on input type
        if file:
            if file.type == "application/pdf":
                text = extract_text_from_pdf(file)
            elif file.type == "text/plain":
                text = extract_text_from_txt(file)
            else:
                st.error("Unsupported file type.")
                text = ""
        else:
            text = user_text

        if len(text) > 0:
            # Function to split the text into chunks of a fixed size
            def split_text_into_chunks(text, chunk_size=512):
                words = text.split()
                for i in range(0, len(words), chunk_size):
                    yield " ".join(words[i:i + chunk_size])


            # Split the text into chunks
            chunks = list(split_text_into_chunks(text))
            summaries = []

            # Summarize each chunk
            for chunk in chunks:
                summarized_chunk = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
                summaries.append(summarized_chunk[0]['summary_text'])

            # Combine summaries from all chunks
            summary = " ".join(summaries)

            # Display the summary
            st.subheader("Summary")
            st.write(summary)
        else:
            st.warning("No text could be extracted from the file or provided by the user.")

    except Exception as e:
        st.error(f"An error occurred during summarization: {str(e)}")