import streamlit as st import spacy import pickle import subprocess # Page title st.set_page_config(page_title="Chris Capobianco's Profile", page_icon=':rocket:', layout='wide') home = st.Page('Home.py', title = 'Home', default = True) # Function to Load the Spacy tokenizer @st.cache_resource def load_nlp(): subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) return spacy.load('en_core_web_sm') def tokenizer(sentence): # Process the text doc = nlp(sentence) # Convert tokens to lemma form for all except '-PRON-' # Recall: Tokens like 'I', 'my', 'me' are represented as '-PRON-' by lemma attribute (See SpaCy Introduction) tokens = [ token.lemma_.lower().strip() if token.lemma_ != "-PRON-" else token.lower_ for token in doc ] # Remove stop words and punctuations tokens = [ token for token in tokens if token not in stopwords and token not in punctuations ] return tokens # Function to Load the model @st.cache_resource def load_tokenizer_model(): with open('./models/autoclassifier.pkl', 'rb') as model_file: stopwords = pickle.load(model_file) punctuations = pickle.load(model_file) model_pipe = pickle.load(model_file) return (stopwords, punctuations, model_pipe) document_classification = st.Page('projects/01_Document_Classifier.py', title='Document Classifier') movie_recommendation = st.Page('projects/02_Movie_Recommendation.py', title='Movie Recommendation') # weather_classification = st.Page('projects/04_Weather_Classification.py', title='Weather Classification') stock_market = st.Page('projects/05_Stock_Market.py', title='Stock Market Forecast') generative_music = st.Page('projects/06_Generative_Music.py', title='Generative Music') llm_fine_tune = st.Page('projects/07_LLM_Fine_Tuned.py', title='Fine Tuned LLM') pg = st.navigation( { 'Home': [ home ], 'Projects': [ document_classification, movie_recommendation, # weather_classification, stock_market, generative_music, llm_fine_tune ] } ) pg.run() # Load the Spacy tokenizer nlp = load_nlp() # Load the Model stopwords, punctuations, model_pipe = load_tokenizer_model()