Spaces:

matsammut
/

ICS5110-Applied_ML

Sleeping

App Files Files Community

matsammut commited on 22 days ago

Commit

aefb60f

verified ·

1 Parent(s): 93497bf

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -36

app.py CHANGED Viewed

@@ -11,12 +11,9 @@ import pickle
 import hdbscan
-# # Define the prediction function
 def predict_ann(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
-    # columns = {
-    # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
-    # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
-    # "hours-per-week":[hours_per_week], "native-country":[native_country]}
     columns = { "0":[0],
     "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -24,19 +21,14 @@ def predict_ann(age, workclass, education, occupation, race, gender, capital_gai
     df = pd.DataFrame(data=columns)
     fixed_features = cleaning_features(df,race,False)
     print(fixed_features)
-    # with open('ann_model.pkl', 'rb') as ann_model_file:
-    #     ann_model = pickle.load(ann_model_file)
-    scaler = StandardScaler()
     ann_model = load_model('ann_model.h5')
     prediction = ann_model.predict(fixed_features)
-    # prediction = 1
     return "Income >50K" if prediction == 1 else "Income <=50K"
 def predict_rf(age, workclass, education,  occupation,  race, gender, capital_gain, capital_loss, hours_per_week, native_country):
-    # columns = {
-    # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
-    # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
-    # "hours-per-week":[hours_per_week], "native-country":[native_country]}
     columns = {
     "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -44,12 +36,9 @@ def predict_rf(age, workclass, education,  occupation,  race, gender, capital_ga
     df = pd.DataFrame(data=columns)
     fixed_features = cleaning_features(df,race,False)
     print(fixed_features)
-    # with open('ann_model.pkl', 'rb') as ann_model_file:
-    #     ann_model = pickle.load(ann_model_file)
-    scaler = StandardScaler()
     rf_model = pickle.load(open('rf_model.pkl', 'rb'))
-    prediction = rf_model.predict(fixed_features)
-    # prediction = 1
     return "Income >50K" if prediction == 1 else "Income <=50K"
 def predict_hb(age, workclass, education,  occupation,  race, gender, capital_gain, capital_loss, hours_per_week, native_country):
@@ -66,23 +55,23 @@ def predict_hb(age, workclass, education,  occupation,  race, gender, capital_ga
     df = pd.DataFrame(data=columns)
     fixed_features = cleaning_features(df,race,True)
     print(fixed_features)
-    # with open('ann_model.pkl', 'rb') as ann_model_file:
-    #     ann_model = pickle.load(ann_model_file)
-    scaler = StandardScaler()
-    X = scaler.fit_transform(fixed_features)
-    clusterer = hdbscan.HDBSCAN(
-    min_cluster_size=220,
-    min_samples=117,
-    metric='euclidean',
-    cluster_selection_method='eom',
-    prediction_data=True,
-    cluster_selection_epsilon=0.28479667859306007
-    )
-    prediction = clusterer.fit_predict(X)
-    filename = 'hdbscan_model.pkl'
-    pickle.dump(clusterer, open(filename, 'wb'))
     return f"Predicted Cluster (HDBSCAN): {prediction[-1]}"
@@ -148,9 +137,9 @@ def cleaning_features(data,race,hdbscan):
     data = pca(data)
     if(hdbscan):
-        df_transformed = pd.read_csv('dataset.csv')
-        X = df_transformed.drop('income', axis=1)
-        data = pd.concat([X, data], ignore_index=True)
         data['capital-gain'] = np.log1p(data['capital-gain'])
         data['capital-loss'] = np.log1p(data['capital-loss'])
         scaler = joblib.load("robust_scaler.pkl")

 import hdbscan
 def predict_ann(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     columns = { "0":[0],
     "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     df = pd.DataFrame(data=columns)
     fixed_features = cleaning_features(df,race,False)
     print(fixed_features)
     ann_model = load_model('ann_model.h5')
     prediction = ann_model.predict(fixed_features)
     return "Income >50K" if prediction == 1 else "Income <=50K"
 def predict_rf(age, workclass, education,  occupation,  race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     columns = {
     "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     df = pd.DataFrame(data=columns)
     fixed_features = cleaning_features(df,race,False)
     print(fixed_features)
     rf_model = pickle.load(open('rf_model.pkl', 'rb'))
     return "Income >50K" if prediction == 1 else "Income <=50K"
 def predict_hb(age, workclass, education,  occupation,  race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     df = pd.DataFrame(data=columns)
     fixed_features = cleaning_features(df,race,True)
     print(fixed_features)
+    hdb_model = pickle.load(open('hdbscan_model.pkl', 'rb'))
+    prediction = hdb_model.approximate_predict(fixed_features)
+    # scaler = StandardScaler()
+    # X = scaler.fit_transform(fixed_features)
+    # clusterer = hdbscan.HDBSCAN(
+    # min_cluster_size=220,
+    # min_samples=117,
+    # metric='euclidean',
+    # cluster_selection_method='eom',
+    # prediction_data=True,
+    # cluster_selection_epsilon=0.28479667859306007
+    # )
+    # prediction = clusterer.fit_predict(X)
+    # filename = 'hdbscan_model.pkl'
+    # pickle.dump(clusterer, open(filename, 'wb'))
     return f"Predicted Cluster (HDBSCAN): {prediction[-1]}"
     data = pca(data)
     if(hdbscan):
+        # df_transformed = pd.read_csv('dataset.csv')
+        # X = df_transformed.drop('income', axis=1)
+        # data = pd.concat([X, data], ignore_index=True)
         data['capital-gain'] = np.log1p(data['capital-gain'])
         data['capital-loss'] = np.log1p(data['capital-loss'])
         scaler = joblib.load("robust_scaler.pkl")