poudel's picture
Upload 11 files
f637442 verified
raw
history blame
No virus
4.42 kB
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
def load_data(file_path):
return pd.read_csv(file_path)
def preprocess_data(data, selected_features, categorical_features, numerical_features):
# Define preprocessing pipelines
numeric_transformer = Pipeline(steps=[
('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('encoder', OneHotEncoder(handle_unknown='ignore'))
])
# Combine preprocessing steps
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
])
# Split the datasets
X = data[selected_features]
y = data['fuel_burn_total']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Preprocess the datasets
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)
return X_train, X_test, y_train, y_test, preprocessor
def build_model(input_shape):
model = Sequential([
Input(shape=(input_shape,)),
Dense(64, activation='relu'),
Dense(64, activation='relu'),
Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
return model
def train_model(model, X_train, y_train, epochs=50, batch_size=32, patience=10, validation_split=0.2):
"""
Trains the provided model using the training data.
Parameters:
model (tensorflow.keras.Model): The model to be trained.
X_train (numpy.ndarray): The training data.
y_train (numpy.ndarray): The target values for the training data.
epochs (int, optional): The number of epochs to train the model. Default is 50.
batch_size (int, optional): The number of samples per gradient update. Default is 32.
patience (int, optional): Number of epochs with no improvement after which training will be stopped. Default is 10.
validation_split (float, optional): Fraction of the training data to be used as validation data. Default is 0.2.
Returns:
model (tensorflow.keras.Model): The trained model.
history (tensorflow.python.keras.callbacks.History): A record of training loss values and metrics values at successive epochs.
"""
early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_split=validation_split, epochs=epochs, callbacks=[early_stopping],
batch_size=batch_size)
return model, history
def evaluate_model(model, X_test, y_test):
y_pred = model.predict(X_test)
mae = np.mean(np.abs(y_test - y_pred.flatten()))
rmse = np.sqrt(np.mean((y_test - y_pred.flatten()) ** 2))
return mae, rmse
def save_model(model, preprocessor, model_path, preprocessor_path):
model.save(model_path)
joblib.dump(preprocessor, preprocessor_path)
def main():
data = load_data('../../datasets/preprocessed_data.csv')
selected_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'seats', 'distance', 'J/T',
'CAT', 'dist']
categorical_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'J/T', 'CAT']
numerical_features = ['seats', 'distance', 'dist']
X_train, X_test, y_train, y_test, preprocessor = preprocess_data(data, selected_features, categorical_features,
numerical_features)
model = build_model(X_train.shape[1])
model, history = train_model(model, X_train, y_train)
mae, rmse = evaluate_model(model, X_test, y_test)
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')
save_model(model, preprocessor, '../../saved_models/nn_model.keras', '../../saved_models/nn_preprocessor.pkl')
if __name__ == "__main__":
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
main()