import streamlit as st import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForSeq2SeqLM,AutoTokenizer,AutoTokenizer import re # Process text fun def process_text(text): # Remove extra white space from the text text = " ".join(text.split()) # Remove special characters text = re.sub(r"[^a-zA-Z0-9.]+", " ", text) # Remove multiple full stops text = re.sub(r"\.{2,}", ".", text) return text # Load model def load_model(): config = PeftConfig.from_pretrained("sami606713/medical_report_summarizer") base_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn") model = PeftModel.from_pretrained(base_model, "sami606713/medical_report_summarizer") return model # Load tokenizer def load_tokenizer(): tokenizer=AutoTokenizer.from_pretrained("sami606713/medical_report_summarizer") return tokenizer # Summerize text def summerize_text(text): # process the text text=process_text(text) # tokenize the text tokenizer=load_tokenizer() inputs = tokenizer(text, max_length=1000, truncation=True, padding='max_length', return_tensors='pt') # Generate the output idx model=load_model() summary_ids = model.generate(**inputs, max_length=300) predicted_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return predicted_summary # set the page layout st.set_page_config(page_title="Medical Report Summerization", page_icon="🩺", layout="wide") # Set the title of the app st.title('Medical Report Summerization') # Add a pdf file uploader # uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) # if uploaded_file is not None: # st.write(uploaded_file) text=st.text_area("Enter report here:\n",height=250) if st.button("Summarize"): with st.spinner("Summarizing..."): # Changed to st.spinner for a spinner effect result = summerize_text(text) st.write(f"*{result}*")