matsammut commited on
Commit
aefb60f
·
verified ·
1 Parent(s): 93497bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -36
app.py CHANGED
@@ -11,12 +11,9 @@ import pickle
11
  import hdbscan
12
 
13
 
14
- # # Define the prediction function
15
  def predict_ann(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
16
- # columns = {
17
- # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
18
- # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
19
- # "hours-per-week":[hours_per_week], "native-country":[native_country]}
20
  columns = { "0":[0],
21
  "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
22
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -24,19 +21,14 @@ def predict_ann(age, workclass, education, occupation, race, gender, capital_gai
24
  df = pd.DataFrame(data=columns)
25
  fixed_features = cleaning_features(df,race,False)
26
  print(fixed_features)
27
- # with open('ann_model.pkl', 'rb') as ann_model_file:
28
- # ann_model = pickle.load(ann_model_file)
29
- scaler = StandardScaler()
30
  ann_model = load_model('ann_model.h5')
31
  prediction = ann_model.predict(fixed_features)
32
- # prediction = 1
33
  return "Income >50K" if prediction == 1 else "Income <=50K"
34
 
35
  def predict_rf(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
36
- # columns = {
37
- # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
38
- # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
39
- # "hours-per-week":[hours_per_week], "native-country":[native_country]}
40
  columns = {
41
  "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
42
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -44,12 +36,9 @@ def predict_rf(age, workclass, education, occupation, race, gender, capital_ga
44
  df = pd.DataFrame(data=columns)
45
  fixed_features = cleaning_features(df,race,False)
46
  print(fixed_features)
47
- # with open('ann_model.pkl', 'rb') as ann_model_file:
48
- # ann_model = pickle.load(ann_model_file)
49
- scaler = StandardScaler()
50
  rf_model = pickle.load(open('rf_model.pkl', 'rb'))
51
- prediction = rf_model.predict(fixed_features)
52
- # prediction = 1
53
  return "Income >50K" if prediction == 1 else "Income <=50K"
54
 
55
  def predict_hb(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
@@ -66,23 +55,23 @@ def predict_hb(age, workclass, education, occupation, race, gender, capital_ga
66
  df = pd.DataFrame(data=columns)
67
  fixed_features = cleaning_features(df,race,True)
68
  print(fixed_features)
69
- # with open('ann_model.pkl', 'rb') as ann_model_file:
70
- # ann_model = pickle.load(ann_model_file)
71
- scaler = StandardScaler()
72
- X = scaler.fit_transform(fixed_features)
73
 
74
- clusterer = hdbscan.HDBSCAN(
75
- min_cluster_size=220,
76
- min_samples=117,
77
- metric='euclidean',
78
- cluster_selection_method='eom',
79
- prediction_data=True,
80
- cluster_selection_epsilon=0.28479667859306007
81
- )
82
 
83
- prediction = clusterer.fit_predict(X)
84
- filename = 'hdbscan_model.pkl'
85
- pickle.dump(clusterer, open(filename, 'wb'))
86
 
87
  return f"Predicted Cluster (HDBSCAN): {prediction[-1]}"
88
 
@@ -148,9 +137,9 @@ def cleaning_features(data,race,hdbscan):
148
 
149
  data = pca(data)
150
  if(hdbscan):
151
- df_transformed = pd.read_csv('dataset.csv')
152
- X = df_transformed.drop('income', axis=1)
153
- data = pd.concat([X, data], ignore_index=True)
154
  data['capital-gain'] = np.log1p(data['capital-gain'])
155
  data['capital-loss'] = np.log1p(data['capital-loss'])
156
  scaler = joblib.load("robust_scaler.pkl")
 
11
  import hdbscan
12
 
13
 
14
+
15
  def predict_ann(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
16
+
 
 
 
17
  columns = { "0":[0],
18
  "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
19
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
 
21
  df = pd.DataFrame(data=columns)
22
  fixed_features = cleaning_features(df,race,False)
23
  print(fixed_features)
24
+
 
 
25
  ann_model = load_model('ann_model.h5')
26
  prediction = ann_model.predict(fixed_features)
27
+
28
  return "Income >50K" if prediction == 1 else "Income <=50K"
29
 
30
  def predict_rf(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
31
+
 
 
 
32
  columns = {
33
  "age": [age], "workclass":[workclass], "educational-num":[education], "occupation":[occupation],
34
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
 
36
  df = pd.DataFrame(data=columns)
37
  fixed_features = cleaning_features(df,race,False)
38
  print(fixed_features)
39
+
 
 
40
  rf_model = pickle.load(open('rf_model.pkl', 'rb'))
41
+
 
42
  return "Income >50K" if prediction == 1 else "Income <=50K"
43
 
44
  def predict_hb(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
 
55
  df = pd.DataFrame(data=columns)
56
  fixed_features = cleaning_features(df,race,True)
57
  print(fixed_features)
58
+ hdb_model = pickle.load(open('hdbscan_model.pkl', 'rb'))
59
+ prediction = hdb_model.approximate_predict(fixed_features)
60
+ # scaler = StandardScaler()
61
+ # X = scaler.fit_transform(fixed_features)
62
 
63
+ # clusterer = hdbscan.HDBSCAN(
64
+ # min_cluster_size=220,
65
+ # min_samples=117,
66
+ # metric='euclidean',
67
+ # cluster_selection_method='eom',
68
+ # prediction_data=True,
69
+ # cluster_selection_epsilon=0.28479667859306007
70
+ # )
71
 
72
+ # prediction = clusterer.fit_predict(X)
73
+ # filename = 'hdbscan_model.pkl'
74
+ # pickle.dump(clusterer, open(filename, 'wb'))
75
 
76
  return f"Predicted Cluster (HDBSCAN): {prediction[-1]}"
77
 
 
137
 
138
  data = pca(data)
139
  if(hdbscan):
140
+ # df_transformed = pd.read_csv('dataset.csv')
141
+ # X = df_transformed.drop('income', axis=1)
142
+ # data = pd.concat([X, data], ignore_index=True)
143
  data['capital-gain'] = np.log1p(data['capital-gain'])
144
  data['capital-loss'] = np.log1p(data['capital-loss'])
145
  scaler = joblib.load("robust_scaler.pkl")