sami606713's picture
Update app.py
b6ffb9a verified
import streamlit as st
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSeq2SeqLM,AutoTokenizer,AutoTokenizer
import re
# Process text fun
def process_text(text):
# Remove extra white space from the text
text = " ".join(text.split())
# Remove special characters
text = re.sub(r"[^a-zA-Z0-9.]+", " ", text)
# Remove multiple full stops
text = re.sub(r"\.{2,}", ".", text)
return text
# Load model
def load_model():
config = PeftConfig.from_pretrained("sami606713/medical_report_summarizer")
base_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
model = PeftModel.from_pretrained(base_model, "sami606713/medical_report_summarizer")
return model
# Load tokenizer
def load_tokenizer():
tokenizer=AutoTokenizer.from_pretrained("sami606713/medical_report_summarizer")
return tokenizer
# Summerize text
def summerize_text(text):
# process the text
text=process_text(text)
# tokenize the text
tokenizer=load_tokenizer()
inputs = tokenizer(text, max_length=1000, truncation=True, padding='max_length', return_tensors='pt')
# Generate the output idx
model=load_model()
summary_ids = model.generate(**inputs, max_length=300)
predicted_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return predicted_summary
# set the page layout
st.set_page_config(page_title="Medical Report Summerization",
page_icon="🩺", layout="wide")
# Set the title of the app
st.title('Medical Report Summerization')
# Add a pdf file uploader
# uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
# if uploaded_file is not None:
# st.write(uploaded_file)
text=st.text_area("Enter report here:\n",height=250)
if st.button("Summarize"):
with st.spinner("Summarizing..."): # Changed to st.spinner for a spinner effect
result = summerize_text(text)
st.write(f"*{result}*")