shaimaa1 commited on
Commit
57eb1ae
·
verified ·
1 Parent(s): 5087b52

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
3
+ import torch
4
+ from PyPDF2 import PdfReader
5
+
6
+ # Load the summarization pipeline (Hugging Face model)
7
+ st.subheader("File Summarization Tool")
8
+
9
+ # Check if GPU is available
10
+ device = 0 if torch.cuda.is_available() else -1
11
+
12
+ # Use a more general model loading approach for better error handling
13
+ try:
14
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
15
+ #summarizer = pipeline("summarization", model="facebook/bart-large", device=device)
16
+ except Exception as e:
17
+ st.error(f"Error loading model: {str(e)}")
18
+ summarizer = None
19
+
20
+
21
+ # Function to extract text from a PDF file
22
+ def extract_text_from_pdf(pdf_file):
23
+ reader = PdfReader(pdf_file)
24
+ text = ""
25
+ for page_num in range(len(reader.pages)):
26
+ page = reader.pages[page_num]
27
+ text += page.extract_text()
28
+ return text
29
+
30
+
31
+ # Function to extract text from a TXT file
32
+ def extract_text_from_txt(txt_file):
33
+ return txt_file.read().decode("utf-8")
34
+
35
+
36
+ # Streamlit file uploader
37
+ file = st.file_uploader("Upload a PDF or TXT file", type=["pdf", "txt"])
38
+
39
+ # Text input area for user-provided text
40
+ user_text = st.text_area("Or write your text here:", "")
41
+
42
+ if (file or user_text) and summarizer:
43
+ try:
44
+ # Extract text based on input type
45
+ if file:
46
+ if file.type == "application/pdf":
47
+ text = extract_text_from_pdf(file)
48
+ elif file.type == "text/plain":
49
+ text = extract_text_from_txt(file)
50
+ else:
51
+ st.error("Unsupported file type.")
52
+ text = ""
53
+ else:
54
+ text = user_text
55
+
56
+ if len(text) > 0:
57
+ # Function to split the text into chunks of a fixed size
58
+ def split_text_into_chunks(text, chunk_size=512):
59
+ words = text.split()
60
+ for i in range(0, len(words), chunk_size):
61
+ yield " ".join(words[i:i + chunk_size])
62
+
63
+
64
+ # Split the text into chunks
65
+ chunks = list(split_text_into_chunks(text))
66
+ summaries = []
67
+
68
+ # Summarize each chunk
69
+ for chunk in chunks:
70
+ summarized_chunk = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
71
+ summaries.append(summarized_chunk[0]['summary_text'])
72
+
73
+ # Combine summaries from all chunks
74
+ summary = " ".join(summaries)
75
+
76
+ # Display the summary
77
+ st.subheader("Summary")
78
+ st.write(summary)
79
+ else:
80
+ st.warning("No text could be extracted from the file or provided by the user.")
81
+
82
+ except Exception as e:
83
+ st.error(f"An error occurred during summarization: {str(e)}")