poudel commited on
Commit
5728f18
1 Parent(s): 654d76b

Delete neural_network

Browse files
neural_network/__pycache__/inference.cpython-39.pyc DELETED
Binary file (2.53 kB)
 
neural_network/inference.py DELETED
@@ -1,76 +0,0 @@
1
- import joblib
2
- import numpy as np
3
- import pandas as pd
4
- import tensorflow as tf
5
-
6
-
7
- def load_data(path):
8
- df = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
9
- df = df.drop(df.columns[0], axis=1) # drop the Unnamed: 0 column
10
- return df
11
-
12
-
13
- def load_model_and_preprocessor(model_path, preprocessor_path):
14
- loaded_model = tf.keras.models.load_model('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_model.keras')
15
- preprocessor = joblib.load('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_preprocessor.pkl')
16
- return loaded_model, preprocessor
17
-
18
-
19
- def select_features(df, selected_features):
20
- X_test = df[selected_features]
21
- y_test = df['fuel_burn_total']
22
- return X_test, y_test
23
-
24
-
25
- def preprocess_data(preprocessor, X_test):
26
- X_test_processed = preprocessor.transform(X_test)
27
- return X_test_processed
28
-
29
-
30
- def predict_in_batches(loaded_model, X_test_processed, y_test, batch_size):
31
- num_batches = X_test_processed.shape[0] // batch_size + int(X_test_processed.shape[0] % batch_size != 0)
32
- total_accuracy = 0
33
-
34
- for batch_num in range(num_batches):
35
- start_index = batch_num * batch_size
36
- end_index = min(start_index + batch_size, X_test_processed.shape[0])
37
- batch_X = X_test_processed[start_index:end_index]
38
- batch_y = y_test.iloc[start_index:end_index]
39
-
40
- # Make predictions with the loaded final model
41
- batch_predictions = loaded_model.predict(batch_X)
42
-
43
- # Calculate accuracy for the current batch
44
- batch_accuracy = 1 - np.mean(np.abs(batch_y.values - batch_predictions[:, 0]) / batch_y.values)
45
- total_accuracy += batch_accuracy * (end_index - start_index)
46
-
47
- print(f'Batch {batch_num + 1}/{num_batches} - Accuracy: {batch_accuracy:.2%}')
48
-
49
- average_accuracy = total_accuracy / X_test_processed.shape[0]
50
- print(f'Average Accuracy: {average_accuracy:.2%}')
51
-
52
-
53
- def main():
54
- df = load_data('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
55
-
56
- loaded_model, preprocessor = load_model_and_preprocessor( '/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_model.keras',
57
- '/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_preprocessor.pkl')
58
-
59
- selected_features = [
60
- 'Origin_Airport', 'Destination_Airport', 'Operating_Airline', 'model', '_Manufacturer',
61
- 'seats', 'distance', '_Operating_Airline_ASK_(Millions)', 'FLIGHT_ID', 'FFLOW_KGM',
62
- 'J/T', 'CAT', 'dist', 'mean_taxi_in'
63
- ]
64
- # Select only the relevant features
65
- X_test, y_test = select_features(df, selected_features)
66
-
67
- X_test_processed = preprocess_data(preprocessor, X_test)
68
-
69
- predict_in_batches(loaded_model, X_test_processed, y_test, batch_size=32)
70
-
71
-
72
- if __name__ == "__main__":
73
- import os
74
-
75
- os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
76
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neural_network/model.py DELETED
@@ -1,118 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- from tensorflow.keras.models import Sequential
4
- from tensorflow.keras.layers import Dense, Dropout, Input
5
- from tensorflow.keras.callbacks import EarlyStopping
6
- from sklearn.model_selection import train_test_split
7
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
8
- from sklearn.compose import ColumnTransformer
9
- from sklearn.pipeline import Pipeline
10
- import joblib
11
-
12
-
13
- def load_data(file_path):
14
- return pd.read_csv(file_path)
15
-
16
-
17
- def preprocess_data(data, selected_features, categorical_features, numerical_features):
18
- # Define preprocessing pipelines
19
- numeric_transformer = Pipeline(steps=[
20
- ('scaler', StandardScaler())
21
- ])
22
- categorical_transformer = Pipeline(steps=[
23
- ('encoder', OneHotEncoder(handle_unknown='ignore'))
24
- ])
25
-
26
- # Combine preprocessing steps
27
- preprocessor = ColumnTransformer(
28
- transformers=[
29
- ('num', numeric_transformer, numerical_features),
30
- ('cat', categorical_transformer, categorical_features)
31
- ])
32
-
33
- # Split the datasets
34
- X = data[selected_features]
35
- y = data['fuel_burn_total']
36
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
37
-
38
- # Preprocess the datasets
39
- X_train = preprocessor.fit_transform(X_train)
40
- X_test = preprocessor.transform(X_test)
41
-
42
- return X_train, X_test, y_train, y_test, preprocessor
43
-
44
-
45
- def build_model(input_shape):
46
- model = Sequential([
47
- Input(shape=(input_shape,)),
48
- Dense(64, activation='relu'),
49
- Dense(64, activation='relu'),
50
- Dense(1)
51
- ])
52
- model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
53
- return model
54
-
55
-
56
- def train_model(model, X_train, y_train, epochs=50, batch_size=32, patience=10, validation_split=0.2):
57
- """
58
- Trains the provided model using the training data.
59
-
60
- Parameters:
61
- model (tensorflow.keras.Model): The model to be trained.
62
- X_train (numpy.ndarray): The training data.
63
- y_train (numpy.ndarray): The target values for the training data.
64
- epochs (int, optional): The number of epochs to train the model. Default is 50.
65
- batch_size (int, optional): The number of samples per gradient update. Default is 32.
66
- patience (int, optional): Number of epochs with no improvement after which training will be stopped. Default is 10.
67
- validation_split (float, optional): Fraction of the training data to be used as validation data. Default is 0.2.
68
-
69
- Returns:
70
- model (tensorflow.keras.Model): The trained model.
71
- history (tensorflow.python.keras.callbacks.History): A record of training loss values and metrics values at successive epochs.
72
- """
73
- early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
74
- history = model.fit(X_train, y_train, validation_split=validation_split, epochs=epochs, callbacks=[early_stopping],
75
- batch_size=batch_size)
76
-
77
- return model, history
78
-
79
-
80
- def evaluate_model(model, X_test, y_test):
81
- y_pred = model.predict(X_test)
82
- mae = np.mean(np.abs(y_test - y_pred.flatten()))
83
- rmse = np.sqrt(np.mean((y_test - y_pred.flatten()) ** 2))
84
- return mae, rmse
85
-
86
-
87
- def save_model(model, preprocessor, model_path, preprocessor_path):
88
- model.save(model_path)
89
- joblib.dump(preprocessor, preprocessor_path)
90
-
91
-
92
- def main():
93
- data = load_data('../../datasets/preprocessed_data.csv')
94
- selected_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'seats', 'distance', 'J/T',
95
- 'CAT', 'dist']
96
-
97
- categorical_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'J/T', 'CAT']
98
- numerical_features = ['seats', 'distance', 'dist']
99
-
100
- X_train, X_test, y_train, y_test, preprocessor = preprocess_data(data, selected_features, categorical_features,
101
- numerical_features)
102
-
103
- model = build_model(X_train.shape[1])
104
-
105
- model, history = train_model(model, X_train, y_train)
106
-
107
- mae, rmse = evaluate_model(model, X_test, y_test)
108
- print(f'MAE: {mae}')
109
- print(f'RMSE: {rmse}')
110
-
111
- save_model(model, preprocessor, '../../saved_models/nn_model.keras', '../../saved_models/nn_preprocessor.pkl')
112
-
113
-
114
- if __name__ == "__main__":
115
- import os
116
-
117
- os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
118
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neural_network/neural_network.py DELETED
@@ -1,161 +0,0 @@
1
- # # -*- coding: utf-8 -*-
2
- # """neural_network.ipynb
3
-
4
- # Automatically generated by Colab.
5
-
6
- # Original file is located at
7
- # https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
8
- # """
9
-
10
- # import pandas as pd
11
- # import requests
12
- # import numpy as np
13
- # from sklearn.model_selection import train_test_split
14
- # from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
15
- # from sklearn.preprocessing import StandardScaler, OneHotEncoder
16
- # from tensorflow.keras.models import Sequential
17
- # from tensorflow.keras.layers import Dense
18
- # from scipy.stats import f
19
-
20
- # # Neural Network model
21
- # # Note here I am using a new dataset which Abdulelah shared with me.
22
- # # dataa filename "preprocessed_data.csv"
23
-
24
- # dataset = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
25
- # # using dataset Abdulelah gave me
26
-
27
- # dataset.dropna(inplace = True)
28
- # dataset.head()
29
-
30
-
31
- # feature = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
32
- # target = dataset['fuel_burn_total']
33
-
34
- # feature = feature.copy()
35
- # feature.drop('model', axis=1, inplace=True)
36
-
37
- # # doing encoding
38
- # encoder = OneHotEncoder(sparse_output = False)
39
- # feature_encoded = pd.DataFrame(encoder.fit_transform(feature[['model']]))
40
- # feature_encoded.columns = encoder.get_feature_names_out(['model'])
41
- # feature.drop('model', axis = 1, inplace = True)
42
- # feature = pd.concat([feature.reset_index(drop = True), feature_encoded.reset_index(drop = True)], axis = 1)
43
-
44
-
45
- # feature_train, feature_test, target_train, target_test = train_test_split(feature, target, test_size = 0.1, random_state = 42) # split into train and test
46
- # scaler = StandardScaler()
47
- # feature_train_scaled = scaler.fit_transform(feature_train)
48
- # feature_test_scaled = scaler.transform(feature_test)
49
-
50
- # # building the model
51
- # model = Sequential([
52
- # Dense(64, activation = 'relu', input_shape = (feature_train_scaled.shape[1],)),
53
- # Dense(64, activation = 'relu'),
54
- # Dense(1)]) # can change dense
55
- # model.compile(optimizer = 'adam', loss = 'mean_squared_error') # compiling model
56
- # model.fit(feature_train_scaled, target_train, epochs = 50, batch_size = 32, verbose = 1) # training model
57
-
58
- # mse = model.evaluate(feature_test_scaled, target_test)
59
- # print("mean squared e", mse)
60
-
61
-
62
- # target_prediction = model.predict(feature_test_scaled)
63
- # r2 = r2_score(target_test, target_prediction)
64
- # mae = mean_absolute_error(target_test, target_prediction)
65
- # mse = mean_squared_error(target_test, target_prediction)
66
-
67
- # feature_we_want = len(target) # what we are looking for
68
- # regression = 1 # there is only one predictor
69
- # residual = feature_we_want - 2
70
- # explained_variance = r2 * np.sum((target - np.mean(target))**2)
71
- # unexplained_variance = mse * feature_we_want
72
-
73
- # F_value = (explained_variance / regression) / (unexplained_variance / residual) # calculating the F statistic for the report purposes
74
- # p_value = 1 - f.cdf(F_value, regression, residual)
75
- # rse = np.sqrt(mse)
76
-
77
- # print(f"Rquared {r2}")
78
- # print(f"mean absolute e {mae}")
79
- # print(f"mean squared e {mse}")
80
- # print(f"regression: {regression:.4f}")
81
- # print(f"residual: {residual:.4f}")
82
- # print(f"p-value: {p_value:.4f}") # calculating P value for the report
83
- # print(f"standard error: {rse:.2f}")
84
- # print(f"f-statistic: {F_value:.2f}")
85
- # # the mse difference between the predicted and actual fuel burn totals on the model is around 4.97, it it was lower it would be better
86
-
87
- # # mse is 0 now this is a good model !
88
-
89
- import pandas as pd
90
- import numpy as np
91
- from sklearn.model_selection import train_test_split
92
- from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
93
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
94
- from tensorflow.keras.models import Sequential
95
- from tensorflow.keras.layers import Dense
96
- from scipy.stats import f
97
-
98
- # Load the dataset
99
- dataset = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
100
- dataset.dropna(inplace=True)
101
-
102
- # Features and target
103
- features = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
104
- target = dataset['fuel_burn_total']
105
-
106
- # Encoding the 'model' column
107
- encoder = OneHotEncoder(sparse_output=False)
108
- model_encoded = pd.DataFrame(encoder.fit_transform(features[['model']]))
109
- model_encoded.columns = encoder.get_feature_names_out(['model'])
110
-
111
- # Drop the original 'model' column and add the encoded data
112
- features = features.drop('model', axis=1)
113
- features = pd.concat([features.reset_index(drop=True), model_encoded.reset_index(drop=True)], axis=1)
114
-
115
- # Train-test split
116
- feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.1, random_state=42)
117
-
118
- # Feature scaling
119
- scaler = StandardScaler()
120
- feature_train_scaled = scaler.fit_transform(feature_train)
121
- feature_test_scaled = scaler.transform(feature_test)
122
-
123
- # Neural network model
124
- model = Sequential([
125
- Dense(64, activation='relu', input_shape=(feature_train_scaled.shape[1],)),
126
- Dense(64, activation='relu'),
127
- Dense(1)
128
- ])
129
-
130
- # Compile and train the model
131
- model.compile(optimizer='adam', loss='mean_squared_error')
132
- model.fit(feature_train_scaled, target_train, epochs=50, batch_size=32, verbose=1)
133
-
134
- # Evaluate the model
135
- mse = model.evaluate(feature_test_scaled, target_test)
136
- print("Mean Squared Error:", mse)
137
-
138
- # Predictions and performance metrics
139
- target_prediction = model.predict(feature_test_scaled)
140
- r2 = r2_score(target_test, target_prediction)
141
- mae = mean_absolute_error(target_test, target_prediction)
142
- mse = mean_squared_error(target_test, target_prediction)
143
-
144
- # Calculate F-statistic and p-value (for reporting purposes)
145
- n_samples = len(target)
146
- n_predictors = feature_train_scaled.shape[1]
147
- residual = n_samples - n_predictors - 1
148
- explained_variance = r2 * np.sum((target - np.mean(target))**2)
149
- unexplained_variance = mse * n_samples
150
-
151
- F_value = (explained_variance / n_predictors) / (unexplained_variance / residual)
152
- p_value = 1 - f.cdf(F_value, n_predictors, residual)
153
- rse = np.sqrt(mse)
154
-
155
- # Print the results
156
- print(f"R-squared: {r2}")
157
- print(f"Mean Absolute Error: {mae}")
158
- print(f"Mean Squared Error: {mse}")
159
- print(f"p-value: {p_value:.4f}")
160
- print(f"Root Squared Error: {rse:.2f}")
161
- print(f"F-statistic: {F_value:.2f}")