Spaces:
Runtime error
Runtime error
File size: 4,421 Bytes
f637442 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
def load_data(file_path):
return pd.read_csv(file_path)
def preprocess_data(data, selected_features, categorical_features, numerical_features):
# Define preprocessing pipelines
numeric_transformer = Pipeline(steps=[
('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('encoder', OneHotEncoder(handle_unknown='ignore'))
])
# Combine preprocessing steps
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
])
# Split the datasets
X = data[selected_features]
y = data['fuel_burn_total']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Preprocess the datasets
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)
return X_train, X_test, y_train, y_test, preprocessor
def build_model(input_shape):
model = Sequential([
Input(shape=(input_shape,)),
Dense(64, activation='relu'),
Dense(64, activation='relu'),
Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
return model
def train_model(model, X_train, y_train, epochs=50, batch_size=32, patience=10, validation_split=0.2):
"""
Trains the provided model using the training data.
Parameters:
model (tensorflow.keras.Model): The model to be trained.
X_train (numpy.ndarray): The training data.
y_train (numpy.ndarray): The target values for the training data.
epochs (int, optional): The number of epochs to train the model. Default is 50.
batch_size (int, optional): The number of samples per gradient update. Default is 32.
patience (int, optional): Number of epochs with no improvement after which training will be stopped. Default is 10.
validation_split (float, optional): Fraction of the training data to be used as validation data. Default is 0.2.
Returns:
model (tensorflow.keras.Model): The trained model.
history (tensorflow.python.keras.callbacks.History): A record of training loss values and metrics values at successive epochs.
"""
early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_split=validation_split, epochs=epochs, callbacks=[early_stopping],
batch_size=batch_size)
return model, history
def evaluate_model(model, X_test, y_test):
y_pred = model.predict(X_test)
mae = np.mean(np.abs(y_test - y_pred.flatten()))
rmse = np.sqrt(np.mean((y_test - y_pred.flatten()) ** 2))
return mae, rmse
def save_model(model, preprocessor, model_path, preprocessor_path):
model.save(model_path)
joblib.dump(preprocessor, preprocessor_path)
def main():
data = load_data('../../datasets/preprocessed_data.csv')
selected_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'seats', 'distance', 'J/T',
'CAT', 'dist']
categorical_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'J/T', 'CAT']
numerical_features = ['seats', 'distance', 'dist']
X_train, X_test, y_train, y_test, preprocessor = preprocess_data(data, selected_features, categorical_features,
numerical_features)
model = build_model(X_train.shape[1])
model, history = train_model(model, X_train, y_train)
mae, rmse = evaluate_model(model, X_test, y_test)
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')
save_model(model, preprocessor, '../../saved_models/nn_model.keras', '../../saved_models/nn_preprocessor.pkl')
if __name__ == "__main__":
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
main()
|