Spaces:
Runtime error
Runtime error
Delete neural_network
Browse files
neural_network/__pycache__/inference.cpython-39.pyc
DELETED
Binary file (2.53 kB)
|
|
neural_network/inference.py
DELETED
@@ -1,76 +0,0 @@
|
|
1 |
-
import joblib
|
2 |
-
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
-
import tensorflow as tf
|
5 |
-
|
6 |
-
|
7 |
-
def load_data(path):
|
8 |
-
df = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
|
9 |
-
df = df.drop(df.columns[0], axis=1) # drop the Unnamed: 0 column
|
10 |
-
return df
|
11 |
-
|
12 |
-
|
13 |
-
def load_model_and_preprocessor(model_path, preprocessor_path):
|
14 |
-
loaded_model = tf.keras.models.load_model('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_model.keras')
|
15 |
-
preprocessor = joblib.load('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_preprocessor.pkl')
|
16 |
-
return loaded_model, preprocessor
|
17 |
-
|
18 |
-
|
19 |
-
def select_features(df, selected_features):
|
20 |
-
X_test = df[selected_features]
|
21 |
-
y_test = df['fuel_burn_total']
|
22 |
-
return X_test, y_test
|
23 |
-
|
24 |
-
|
25 |
-
def preprocess_data(preprocessor, X_test):
|
26 |
-
X_test_processed = preprocessor.transform(X_test)
|
27 |
-
return X_test_processed
|
28 |
-
|
29 |
-
|
30 |
-
def predict_in_batches(loaded_model, X_test_processed, y_test, batch_size):
|
31 |
-
num_batches = X_test_processed.shape[0] // batch_size + int(X_test_processed.shape[0] % batch_size != 0)
|
32 |
-
total_accuracy = 0
|
33 |
-
|
34 |
-
for batch_num in range(num_batches):
|
35 |
-
start_index = batch_num * batch_size
|
36 |
-
end_index = min(start_index + batch_size, X_test_processed.shape[0])
|
37 |
-
batch_X = X_test_processed[start_index:end_index]
|
38 |
-
batch_y = y_test.iloc[start_index:end_index]
|
39 |
-
|
40 |
-
# Make predictions with the loaded final model
|
41 |
-
batch_predictions = loaded_model.predict(batch_X)
|
42 |
-
|
43 |
-
# Calculate accuracy for the current batch
|
44 |
-
batch_accuracy = 1 - np.mean(np.abs(batch_y.values - batch_predictions[:, 0]) / batch_y.values)
|
45 |
-
total_accuracy += batch_accuracy * (end_index - start_index)
|
46 |
-
|
47 |
-
print(f'Batch {batch_num + 1}/{num_batches} - Accuracy: {batch_accuracy:.2%}')
|
48 |
-
|
49 |
-
average_accuracy = total_accuracy / X_test_processed.shape[0]
|
50 |
-
print(f'Average Accuracy: {average_accuracy:.2%}')
|
51 |
-
|
52 |
-
|
53 |
-
def main():
|
54 |
-
df = load_data('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
|
55 |
-
|
56 |
-
loaded_model, preprocessor = load_model_and_preprocessor( '/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_model.keras',
|
57 |
-
'/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_preprocessor.pkl')
|
58 |
-
|
59 |
-
selected_features = [
|
60 |
-
'Origin_Airport', 'Destination_Airport', 'Operating_Airline', 'model', '_Manufacturer',
|
61 |
-
'seats', 'distance', '_Operating_Airline_ASK_(Millions)', 'FLIGHT_ID', 'FFLOW_KGM',
|
62 |
-
'J/T', 'CAT', 'dist', 'mean_taxi_in'
|
63 |
-
]
|
64 |
-
# Select only the relevant features
|
65 |
-
X_test, y_test = select_features(df, selected_features)
|
66 |
-
|
67 |
-
X_test_processed = preprocess_data(preprocessor, X_test)
|
68 |
-
|
69 |
-
predict_in_batches(loaded_model, X_test_processed, y_test, batch_size=32)
|
70 |
-
|
71 |
-
|
72 |
-
if __name__ == "__main__":
|
73 |
-
import os
|
74 |
-
|
75 |
-
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
76 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neural_network/model.py
DELETED
@@ -1,118 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
from tensorflow.keras.models import Sequential
|
4 |
-
from tensorflow.keras.layers import Dense, Dropout, Input
|
5 |
-
from tensorflow.keras.callbacks import EarlyStopping
|
6 |
-
from sklearn.model_selection import train_test_split
|
7 |
-
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
8 |
-
from sklearn.compose import ColumnTransformer
|
9 |
-
from sklearn.pipeline import Pipeline
|
10 |
-
import joblib
|
11 |
-
|
12 |
-
|
13 |
-
def load_data(file_path):
|
14 |
-
return pd.read_csv(file_path)
|
15 |
-
|
16 |
-
|
17 |
-
def preprocess_data(data, selected_features, categorical_features, numerical_features):
|
18 |
-
# Define preprocessing pipelines
|
19 |
-
numeric_transformer = Pipeline(steps=[
|
20 |
-
('scaler', StandardScaler())
|
21 |
-
])
|
22 |
-
categorical_transformer = Pipeline(steps=[
|
23 |
-
('encoder', OneHotEncoder(handle_unknown='ignore'))
|
24 |
-
])
|
25 |
-
|
26 |
-
# Combine preprocessing steps
|
27 |
-
preprocessor = ColumnTransformer(
|
28 |
-
transformers=[
|
29 |
-
('num', numeric_transformer, numerical_features),
|
30 |
-
('cat', categorical_transformer, categorical_features)
|
31 |
-
])
|
32 |
-
|
33 |
-
# Split the datasets
|
34 |
-
X = data[selected_features]
|
35 |
-
y = data['fuel_burn_total']
|
36 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
37 |
-
|
38 |
-
# Preprocess the datasets
|
39 |
-
X_train = preprocessor.fit_transform(X_train)
|
40 |
-
X_test = preprocessor.transform(X_test)
|
41 |
-
|
42 |
-
return X_train, X_test, y_train, y_test, preprocessor
|
43 |
-
|
44 |
-
|
45 |
-
def build_model(input_shape):
|
46 |
-
model = Sequential([
|
47 |
-
Input(shape=(input_shape,)),
|
48 |
-
Dense(64, activation='relu'),
|
49 |
-
Dense(64, activation='relu'),
|
50 |
-
Dense(1)
|
51 |
-
])
|
52 |
-
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
|
53 |
-
return model
|
54 |
-
|
55 |
-
|
56 |
-
def train_model(model, X_train, y_train, epochs=50, batch_size=32, patience=10, validation_split=0.2):
|
57 |
-
"""
|
58 |
-
Trains the provided model using the training data.
|
59 |
-
|
60 |
-
Parameters:
|
61 |
-
model (tensorflow.keras.Model): The model to be trained.
|
62 |
-
X_train (numpy.ndarray): The training data.
|
63 |
-
y_train (numpy.ndarray): The target values for the training data.
|
64 |
-
epochs (int, optional): The number of epochs to train the model. Default is 50.
|
65 |
-
batch_size (int, optional): The number of samples per gradient update. Default is 32.
|
66 |
-
patience (int, optional): Number of epochs with no improvement after which training will be stopped. Default is 10.
|
67 |
-
validation_split (float, optional): Fraction of the training data to be used as validation data. Default is 0.2.
|
68 |
-
|
69 |
-
Returns:
|
70 |
-
model (tensorflow.keras.Model): The trained model.
|
71 |
-
history (tensorflow.python.keras.callbacks.History): A record of training loss values and metrics values at successive epochs.
|
72 |
-
"""
|
73 |
-
early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
|
74 |
-
history = model.fit(X_train, y_train, validation_split=validation_split, epochs=epochs, callbacks=[early_stopping],
|
75 |
-
batch_size=batch_size)
|
76 |
-
|
77 |
-
return model, history
|
78 |
-
|
79 |
-
|
80 |
-
def evaluate_model(model, X_test, y_test):
|
81 |
-
y_pred = model.predict(X_test)
|
82 |
-
mae = np.mean(np.abs(y_test - y_pred.flatten()))
|
83 |
-
rmse = np.sqrt(np.mean((y_test - y_pred.flatten()) ** 2))
|
84 |
-
return mae, rmse
|
85 |
-
|
86 |
-
|
87 |
-
def save_model(model, preprocessor, model_path, preprocessor_path):
|
88 |
-
model.save(model_path)
|
89 |
-
joblib.dump(preprocessor, preprocessor_path)
|
90 |
-
|
91 |
-
|
92 |
-
def main():
|
93 |
-
data = load_data('../../datasets/preprocessed_data.csv')
|
94 |
-
selected_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'seats', 'distance', 'J/T',
|
95 |
-
'CAT', 'dist']
|
96 |
-
|
97 |
-
categorical_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'J/T', 'CAT']
|
98 |
-
numerical_features = ['seats', 'distance', 'dist']
|
99 |
-
|
100 |
-
X_train, X_test, y_train, y_test, preprocessor = preprocess_data(data, selected_features, categorical_features,
|
101 |
-
numerical_features)
|
102 |
-
|
103 |
-
model = build_model(X_train.shape[1])
|
104 |
-
|
105 |
-
model, history = train_model(model, X_train, y_train)
|
106 |
-
|
107 |
-
mae, rmse = evaluate_model(model, X_test, y_test)
|
108 |
-
print(f'MAE: {mae}')
|
109 |
-
print(f'RMSE: {rmse}')
|
110 |
-
|
111 |
-
save_model(model, preprocessor, '../../saved_models/nn_model.keras', '../../saved_models/nn_preprocessor.pkl')
|
112 |
-
|
113 |
-
|
114 |
-
if __name__ == "__main__":
|
115 |
-
import os
|
116 |
-
|
117 |
-
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
118 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neural_network/neural_network.py
DELETED
@@ -1,161 +0,0 @@
|
|
1 |
-
# # -*- coding: utf-8 -*-
|
2 |
-
# """neural_network.ipynb
|
3 |
-
|
4 |
-
# Automatically generated by Colab.
|
5 |
-
|
6 |
-
# Original file is located at
|
7 |
-
# https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
|
8 |
-
# """
|
9 |
-
|
10 |
-
# import pandas as pd
|
11 |
-
# import requests
|
12 |
-
# import numpy as np
|
13 |
-
# from sklearn.model_selection import train_test_split
|
14 |
-
# from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
|
15 |
-
# from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
16 |
-
# from tensorflow.keras.models import Sequential
|
17 |
-
# from tensorflow.keras.layers import Dense
|
18 |
-
# from scipy.stats import f
|
19 |
-
|
20 |
-
# # Neural Network model
|
21 |
-
# # Note here I am using a new dataset which Abdulelah shared with me.
|
22 |
-
# # dataa filename "preprocessed_data.csv"
|
23 |
-
|
24 |
-
# dataset = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
|
25 |
-
# # using dataset Abdulelah gave me
|
26 |
-
|
27 |
-
# dataset.dropna(inplace = True)
|
28 |
-
# dataset.head()
|
29 |
-
|
30 |
-
|
31 |
-
# feature = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
|
32 |
-
# target = dataset['fuel_burn_total']
|
33 |
-
|
34 |
-
# feature = feature.copy()
|
35 |
-
# feature.drop('model', axis=1, inplace=True)
|
36 |
-
|
37 |
-
# # doing encoding
|
38 |
-
# encoder = OneHotEncoder(sparse_output = False)
|
39 |
-
# feature_encoded = pd.DataFrame(encoder.fit_transform(feature[['model']]))
|
40 |
-
# feature_encoded.columns = encoder.get_feature_names_out(['model'])
|
41 |
-
# feature.drop('model', axis = 1, inplace = True)
|
42 |
-
# feature = pd.concat([feature.reset_index(drop = True), feature_encoded.reset_index(drop = True)], axis = 1)
|
43 |
-
|
44 |
-
|
45 |
-
# feature_train, feature_test, target_train, target_test = train_test_split(feature, target, test_size = 0.1, random_state = 42) # split into train and test
|
46 |
-
# scaler = StandardScaler()
|
47 |
-
# feature_train_scaled = scaler.fit_transform(feature_train)
|
48 |
-
# feature_test_scaled = scaler.transform(feature_test)
|
49 |
-
|
50 |
-
# # building the model
|
51 |
-
# model = Sequential([
|
52 |
-
# Dense(64, activation = 'relu', input_shape = (feature_train_scaled.shape[1],)),
|
53 |
-
# Dense(64, activation = 'relu'),
|
54 |
-
# Dense(1)]) # can change dense
|
55 |
-
# model.compile(optimizer = 'adam', loss = 'mean_squared_error') # compiling model
|
56 |
-
# model.fit(feature_train_scaled, target_train, epochs = 50, batch_size = 32, verbose = 1) # training model
|
57 |
-
|
58 |
-
# mse = model.evaluate(feature_test_scaled, target_test)
|
59 |
-
# print("mean squared e", mse)
|
60 |
-
|
61 |
-
|
62 |
-
# target_prediction = model.predict(feature_test_scaled)
|
63 |
-
# r2 = r2_score(target_test, target_prediction)
|
64 |
-
# mae = mean_absolute_error(target_test, target_prediction)
|
65 |
-
# mse = mean_squared_error(target_test, target_prediction)
|
66 |
-
|
67 |
-
# feature_we_want = len(target) # what we are looking for
|
68 |
-
# regression = 1 # there is only one predictor
|
69 |
-
# residual = feature_we_want - 2
|
70 |
-
# explained_variance = r2 * np.sum((target - np.mean(target))**2)
|
71 |
-
# unexplained_variance = mse * feature_we_want
|
72 |
-
|
73 |
-
# F_value = (explained_variance / regression) / (unexplained_variance / residual) # calculating the F statistic for the report purposes
|
74 |
-
# p_value = 1 - f.cdf(F_value, regression, residual)
|
75 |
-
# rse = np.sqrt(mse)
|
76 |
-
|
77 |
-
# print(f"Rquared {r2}")
|
78 |
-
# print(f"mean absolute e {mae}")
|
79 |
-
# print(f"mean squared e {mse}")
|
80 |
-
# print(f"regression: {regression:.4f}")
|
81 |
-
# print(f"residual: {residual:.4f}")
|
82 |
-
# print(f"p-value: {p_value:.4f}") # calculating P value for the report
|
83 |
-
# print(f"standard error: {rse:.2f}")
|
84 |
-
# print(f"f-statistic: {F_value:.2f}")
|
85 |
-
# # the mse difference between the predicted and actual fuel burn totals on the model is around 4.97, it it was lower it would be better
|
86 |
-
|
87 |
-
# # mse is 0 now this is a good model !
|
88 |
-
|
89 |
-
import pandas as pd
|
90 |
-
import numpy as np
|
91 |
-
from sklearn.model_selection import train_test_split
|
92 |
-
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
|
93 |
-
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
94 |
-
from tensorflow.keras.models import Sequential
|
95 |
-
from tensorflow.keras.layers import Dense
|
96 |
-
from scipy.stats import f
|
97 |
-
|
98 |
-
# Load the dataset
|
99 |
-
dataset = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
|
100 |
-
dataset.dropna(inplace=True)
|
101 |
-
|
102 |
-
# Features and target
|
103 |
-
features = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
|
104 |
-
target = dataset['fuel_burn_total']
|
105 |
-
|
106 |
-
# Encoding the 'model' column
|
107 |
-
encoder = OneHotEncoder(sparse_output=False)
|
108 |
-
model_encoded = pd.DataFrame(encoder.fit_transform(features[['model']]))
|
109 |
-
model_encoded.columns = encoder.get_feature_names_out(['model'])
|
110 |
-
|
111 |
-
# Drop the original 'model' column and add the encoded data
|
112 |
-
features = features.drop('model', axis=1)
|
113 |
-
features = pd.concat([features.reset_index(drop=True), model_encoded.reset_index(drop=True)], axis=1)
|
114 |
-
|
115 |
-
# Train-test split
|
116 |
-
feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.1, random_state=42)
|
117 |
-
|
118 |
-
# Feature scaling
|
119 |
-
scaler = StandardScaler()
|
120 |
-
feature_train_scaled = scaler.fit_transform(feature_train)
|
121 |
-
feature_test_scaled = scaler.transform(feature_test)
|
122 |
-
|
123 |
-
# Neural network model
|
124 |
-
model = Sequential([
|
125 |
-
Dense(64, activation='relu', input_shape=(feature_train_scaled.shape[1],)),
|
126 |
-
Dense(64, activation='relu'),
|
127 |
-
Dense(1)
|
128 |
-
])
|
129 |
-
|
130 |
-
# Compile and train the model
|
131 |
-
model.compile(optimizer='adam', loss='mean_squared_error')
|
132 |
-
model.fit(feature_train_scaled, target_train, epochs=50, batch_size=32, verbose=1)
|
133 |
-
|
134 |
-
# Evaluate the model
|
135 |
-
mse = model.evaluate(feature_test_scaled, target_test)
|
136 |
-
print("Mean Squared Error:", mse)
|
137 |
-
|
138 |
-
# Predictions and performance metrics
|
139 |
-
target_prediction = model.predict(feature_test_scaled)
|
140 |
-
r2 = r2_score(target_test, target_prediction)
|
141 |
-
mae = mean_absolute_error(target_test, target_prediction)
|
142 |
-
mse = mean_squared_error(target_test, target_prediction)
|
143 |
-
|
144 |
-
# Calculate F-statistic and p-value (for reporting purposes)
|
145 |
-
n_samples = len(target)
|
146 |
-
n_predictors = feature_train_scaled.shape[1]
|
147 |
-
residual = n_samples - n_predictors - 1
|
148 |
-
explained_variance = r2 * np.sum((target - np.mean(target))**2)
|
149 |
-
unexplained_variance = mse * n_samples
|
150 |
-
|
151 |
-
F_value = (explained_variance / n_predictors) / (unexplained_variance / residual)
|
152 |
-
p_value = 1 - f.cdf(F_value, n_predictors, residual)
|
153 |
-
rse = np.sqrt(mse)
|
154 |
-
|
155 |
-
# Print the results
|
156 |
-
print(f"R-squared: {r2}")
|
157 |
-
print(f"Mean Absolute Error: {mae}")
|
158 |
-
print(f"Mean Squared Error: {mse}")
|
159 |
-
print(f"p-value: {p_value:.4f}")
|
160 |
-
print(f"Root Squared Error: {rse:.2f}")
|
161 |
-
print(f"F-statistic: {F_value:.2f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|