Spaces:

shaima21
/

lastspace

Running

App Files Files Community

lastspace / app.py

shaima21

Rename app (1).py to app.py

6370933 verified 4 days ago

raw

history blame contribute delete

2.77 kB

	import streamlit as st
	from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
	import torch
	from PyPDF2 import PdfReader

	# Load the summarization pipeline (Hugging Face model)
	st.subheader("File Summarization Tool")

	# Check if GPU is available
	device = 0 if torch.cuda.is_available() else -1

	# Use a more general model loading approach for better error handling
	try:
	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
	#summarizer = pipeline("summarization", model="facebook/bart-large", device=device)
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	summarizer = None


	# Function to extract text from a PDF file
	def extract_text_from_pdf(pdf_file):
	reader = PdfReader(pdf_file)
	text = ""
	for page_num in range(len(reader.pages)):
	page = reader.pages[page_num]
	text += page.extract_text()
	return text


	# Function to extract text from a TXT file
	def extract_text_from_txt(txt_file):
	return txt_file.read().decode("utf-8")


	# Streamlit file uploader
	file = st.file_uploader("Upload a PDF or TXT file", type=["pdf", "txt"])

	# Text input area for user-provided text
	user_text = st.text_area("Or write your text here:", "")

	if (file or user_text) and summarizer:
	try:
	# Extract text based on input type
	if file:
	if file.type == "application/pdf":
	text = extract_text_from_pdf(file)
	elif file.type == "text/plain":
	text = extract_text_from_txt(file)
	else:
	st.error("Unsupported file type.")
	text = ""
	else:
	text = user_text

	if len(text) > 0:
	# Function to split the text into chunks of a fixed size
	def split_text_into_chunks(text, chunk_size=512):
	words = text.split()
	for i in range(0, len(words), chunk_size):
	yield " ".join(words[i:i + chunk_size])


	# Split the text into chunks
	chunks = list(split_text_into_chunks(text))
	summaries = []

	# Summarize each chunk
	for chunk in chunks:
	summarized_chunk = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
	summaries.append(summarized_chunk[0]['summary_text'])

	# Combine summaries from all chunks
	summary = " ".join(summaries)

	# Display the summary
	st.subheader("Summary")
	st.write(summary)
	else:
	st.warning("No text could be extracted from the file or provided by the user.")

	except Exception as e:
	st.error(f"An error occurred during summarization: {str(e)}")