import gradio as gr | |
import joblib | |
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder | |
from sklearn.impute import KNNImputer | |
from sklearn.decomposition import PCA | |
import pickle | |
# Load your saved model | |
# model = joblib.load("ann_model.joblib") | |
# # Define the prediction function | |
def predict(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country): | |
features = [age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country] | |
columns = { | |
"age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation], | |
"relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss], | |
"hours-per-week":[hours_per_week], "native-country":[native_country]} | |
df = pd.DataFrame(data=columns) | |
fixed_features = cleaning_features(df) | |
# prediction = model.predict(features) | |
# prediction = 1 | |
# return "Income >50K" if prediction == 1 else "Income <=50K" | |
return print(fixed_features) | |
def cleaning_features(data): | |
# with open('race_onehot_encoder.pkl', 'rb') as enc_file: | |
# encoder = pickle.load(enc_file) | |
with open('label_encoder_work.pkl', 'rb') as le_file: | |
le_work = pickle.load(le_file) | |
with open('label_encoder_occ.pkl', 'rb') as le_file: | |
le_occ = pickle.load(le_file) | |
with open('scaler.pkl', 'rb') as scaler_file: | |
scaler = pickle.load(scaler_file) | |
education_num_mapping = { | |
"Preschool": 1, | |
"1st-4th": 2, | |
"5th-6th": 3, | |
"7th-8th": 4, | |
"9th": 5, | |
"10th": 6, | |
"11th": 7, | |
"12th": 8, | |
"HS-grad": 9, | |
"Some-college": 10, | |
"Assoc-voc": 11, | |
"Assoc-acdm": 12, | |
"Bachelors": 13, | |
"Masters": 14, | |
"Doctorate": 15, | |
"Prof-school": 16 | |
} | |
gender_mapping = {"Male":1,"Female":0} | |
country_mapping = {"United-States":1,"Other":0} | |
numeric_cols = ['age', 'educational-num', 'hours-per-week'] | |
columns_to_encode = ['race','marital-status','relationship'] | |
data['workclass'] = le_work.transform(data['workclass']) | |
data['occupation'] = le_occ.transform(data['occupation']) | |
data['gender'] = data['gender'].map(gender_mapping) | |
data['native-country'] = data['native-country'].map(country_mapping) | |
data['educational-num'] = data['educational-num'].map(education_num_mapping) | |
data[numeric_cols] = scaler.transform(data[numeric_cols]) | |
#data = pca(data) | |
return data | |
# def pca(data): | |
# encoder = OneHotEncoder(sparse_output=False) | |
# one_hot_encoded = encoder.fit_transform(data[['workclass', 'occupation']]) | |
# encoded_columns_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out()) | |
# pca_net = PCA(n_components=10) | |
# pca_result_net = pca_net.fit_transform(encoded_columns_df) | |
# pca_columns = [f'pca_component_{i+1}' for i in range(10)] | |
# pca_df = pd.DataFrame(pca_result_net, columns=pca_columns) | |
# data = data.drop(columns=['workclass', 'occupation'], axis=1) #remove the original columns | |
# data = pd.concat([data, pca_df], axis=1) | |
# return data | |
def pca(data): | |
encoder_pkl = 'onehot_encoder.pkl' | |
pca_model_pkl = 'pca.pkl' | |
with open(pca_model_pkl, 'rb') as file: | |
pca_model = pickle.load(file) | |
with open(encoder_pkl, 'rb') as file: | |
encoder = pickle.load(file) | |
one_hot_encoded = encoder.transform(data[['workclass', 'occupation']]) | |
encoded_columns_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out()) | |
pca_result_net = pca_model.transform(encoded_columns_df) | |
pca_columns = [f'pca_component_{i+1}' for i in range(pca_model.n_components_)] | |
pca_df = pd.DataFrame(pca_result_net, columns=pca_columns) | |
data = data.drop(columns=['workclass', 'occupation'], axis=1) | |
data = pd.concat([data, pca_df], axis=1) | |
return data | |
def hbdscan_tranform(df_transformed): | |
df_transformed['capital-gain'] = np.log1p(df_transformed['capital-gain']) | |
df_transformed['capital-loss'] = np.log1p(df_transformed['capital-loss']) | |
# Apply RobustScaler to all numerical features | |
numerical_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week'] | |
scaler = RobustScaler() | |
df_transformed[numerical_features] = scaler.fit_transform(df_transformed[numerical_features]) | |
return df_transformed | |
# Create the Gradio interface | |
interface = gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.Slider(18, 90, step=1, label="Age"), | |
gr.Dropdown( | |
["Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov", | |
"Local-gov", "State-gov", "Without-pay", "Never-worked"], | |
label="Workclass" | |
), | |
gr.Dropdown( | |
["Bachelors", "Some-college", "11th", "HS-grad", "Prof-school", | |
"Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters", | |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"], | |
label="Education" | |
), | |
gr.Dropdown( | |
["Married-civ-spouse", "Divorced", "Never-married", "Separated", | |
"Widowed", "Married-spouse-absent", "Married-AF-spouse"], | |
label="Marital Status" | |
), | |
gr.Dropdown( | |
["Tech-support", "Craft-repair", "Other-service", "Sales", | |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners", | |
"Machine-op-inspct", "Adm-clerical", "Farming-fishing", | |
"Transport-moving", "Priv-house-serv", "Protective-serv", | |
"Armed-Forces"], | |
label="Occupation" | |
), | |
gr.Dropdown( | |
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"], | |
label="Relationship" | |
), | |
gr.Dropdown( | |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"], | |
label="Race" | |
), | |
gr.Dropdown( | |
["Male", "Female"], | |
label="Gender" | |
), | |
gr.Slider(1, 90, step=1, label="Hours Per Week"), | |
gr.Slider(0, 100000, step=100, label="Capital Gain"), | |
gr.Slider(0, 5000, step=50, label="Capital Loss"), | |
gr.Dropdown( | |
["United-States", "Other"], | |
label="Native Country" | |
) | |
], | |
outputs="text", | |
title="Adult Income Predictor" | |
) | |
# Launch the app | |
interface.launch() | |