portfolio / app.py
Christopher Capobianco
Get document classifier to load properly
fc8e190
raw
history blame
2.28 kB
import streamlit as st
import spacy
import pickle
import subprocess
# Page title
st.set_page_config(page_title="Chris Capobianco's Profile", page_icon=':rocket:', layout='wide')
home = st.Page('Home.py', title = 'Home', default = True)
# Function to Load the Spacy tokenizer
@st.cache_resource
def load_nlp():
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
return spacy.load('en_core_web_sm')
def tokenizer(sentence):
# Process the text
doc = nlp(sentence)
# Convert tokens to lemma form for all except '-PRON-'
# Recall: Tokens like 'I', 'my', 'me' are represented as '-PRON-' by lemma attribute (See SpaCy Introduction)
tokens = [ token.lemma_.lower().strip() if token.lemma_ != "-PRON-" else token.lower_ for token in doc ]
# Remove stop words and punctuations
tokens = [ token for token in tokens if token not in stopwords and token not in punctuations ]
return tokens
# Function to Load the model
@st.cache_resource
def load_tokenizer_model():
with open('./models/autoclassifier.pkl', 'rb') as model_file:
stopwords = pickle.load(model_file)
punctuations = pickle.load(model_file)
model_pipe = pickle.load(model_file)
return (stopwords, punctuations, model_pipe)
document_classification = st.Page('projects/01_Document_Classifier.py', title='Document Classifier')
movie_recommendation = st.Page('projects/02_Movie_Recommendation.py', title='Movie Recommendation')
# weather_classification = st.Page('projects/04_Weather_Classification.py', title='Weather Classification')
stock_market = st.Page('projects/05_Stock_Market.py', title='Stock Market Forecast')
generative_music = st.Page('projects/06_Generative_Music.py', title='Generative Music')
llm_fine_tune = st.Page('projects/07_LLM_Fine_Tuned.py', title='Fine Tuned LLM')
pg = st.navigation(
{
'Home': [
home
],
'Projects': [
document_classification,
movie_recommendation,
# weather_classification,
stock_market,
generative_music,
llm_fine_tune
]
}
)
pg.run()
# Load the Spacy tokenizer
nlp = load_nlp()
# Load the Model
stopwords, punctuations, model_pipe = load_tokenizer_model()