Spaces:

ccapo
/

portfolio

Running

portfolio / app.py

Christopher Capobianco

Get document classifier to load properly

fc8e190 22 days ago

2.28 kB

	import streamlit as st
	import spacy
	import pickle
	import subprocess

	# Page title
	st.set_page_config(page_title="Chris Capobianco's Profile", page_icon=':rocket:', layout='wide')

	home = st.Page('Home.py', title = 'Home', default = True)

	# Function to Load the Spacy tokenizer
	@st.cache_resource
	def load_nlp():
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
	return spacy.load('en_core_web_sm')

	def tokenizer(sentence):
	# Process the text
	doc = nlp(sentence)

	# Convert tokens to lemma form for all except '-PRON-'
	# Recall: Tokens like 'I', 'my', 'me' are represented as '-PRON-' by lemma attribute (See SpaCy Introduction)
	tokens = [ token.lemma_.lower().strip() if token.lemma_ != "-PRON-" else token.lower_ for token in doc ]

	# Remove stop words and punctuations
	tokens = [ token for token in tokens if token not in stopwords and token not in punctuations ]

	return tokens

	# Function to Load the model
	@st.cache_resource
	def load_tokenizer_model():
	with open('./models/autoclassifier.pkl', 'rb') as model_file:
	stopwords = pickle.load(model_file)
	punctuations = pickle.load(model_file)
	model_pipe = pickle.load(model_file)
	return (stopwords, punctuations, model_pipe)

	document_classification = st.Page('projects/01_Document_Classifier.py', title='Document Classifier')
	movie_recommendation = st.Page('projects/02_Movie_Recommendation.py', title='Movie Recommendation')
	# weather_classification = st.Page('projects/04_Weather_Classification.py', title='Weather Classification')
	stock_market = st.Page('projects/05_Stock_Market.py', title='Stock Market Forecast')
	generative_music = st.Page('projects/06_Generative_Music.py', title='Generative Music')
	llm_fine_tune = st.Page('projects/07_LLM_Fine_Tuned.py', title='Fine Tuned LLM')

	pg = st.navigation(
	{
	'Home': [
	home
	],
	'Projects': [
	document_classification,
	movie_recommendation,
	# weather_classification,
	stock_market,
	generative_music,
	llm_fine_tune
	]
	}
	)

	pg.run()

	# Load the Spacy tokenizer
	nlp = load_nlp()

	# Load the Model
	stopwords, punctuations, model_pipe = load_tokenizer_model()