Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Dense, Dropout, Input | |
from tensorflow.keras.callbacks import EarlyStopping | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
import joblib | |
def load_data(file_path): | |
return pd.read_csv(file_path) | |
def preprocess_data(data, selected_features, categorical_features, numerical_features): | |
# Define preprocessing pipelines | |
numeric_transformer = Pipeline(steps=[ | |
('scaler', StandardScaler()) | |
]) | |
categorical_transformer = Pipeline(steps=[ | |
('encoder', OneHotEncoder(handle_unknown='ignore')) | |
]) | |
# Combine preprocessing steps | |
preprocessor = ColumnTransformer( | |
transformers=[ | |
('num', numeric_transformer, numerical_features), | |
('cat', categorical_transformer, categorical_features) | |
]) | |
# Split the datasets | |
X = data[selected_features] | |
y = data['fuel_burn_total'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Preprocess the datasets | |
X_train = preprocessor.fit_transform(X_train) | |
X_test = preprocessor.transform(X_test) | |
return X_train, X_test, y_train, y_test, preprocessor | |
def build_model(input_shape): | |
model = Sequential([ | |
Input(shape=(input_shape,)), | |
Dense(64, activation='relu'), | |
Dense(64, activation='relu'), | |
Dense(1) | |
]) | |
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae']) | |
return model | |
def train_model(model, X_train, y_train, epochs=50, batch_size=32, patience=10, validation_split=0.2): | |
""" | |
Trains the provided model using the training data. | |
Parameters: | |
model (tensorflow.keras.Model): The model to be trained. | |
X_train (numpy.ndarray): The training data. | |
y_train (numpy.ndarray): The target values for the training data. | |
epochs (int, optional): The number of epochs to train the model. Default is 50. | |
batch_size (int, optional): The number of samples per gradient update. Default is 32. | |
patience (int, optional): Number of epochs with no improvement after which training will be stopped. Default is 10. | |
validation_split (float, optional): Fraction of the training data to be used as validation data. Default is 0.2. | |
Returns: | |
model (tensorflow.keras.Model): The trained model. | |
history (tensorflow.python.keras.callbacks.History): A record of training loss values and metrics values at successive epochs. | |
""" | |
early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True) | |
history = model.fit(X_train, y_train, validation_split=validation_split, epochs=epochs, callbacks=[early_stopping], | |
batch_size=batch_size) | |
return model, history | |
def evaluate_model(model, X_test, y_test): | |
y_pred = model.predict(X_test) | |
mae = np.mean(np.abs(y_test - y_pred.flatten())) | |
rmse = np.sqrt(np.mean((y_test - y_pred.flatten()) ** 2)) | |
return mae, rmse | |
def save_model(model, preprocessor, model_path, preprocessor_path): | |
model.save(model_path) | |
joblib.dump(preprocessor, preprocessor_path) | |
def main(): | |
data = load_data('../../datasets/preprocessed_data.csv') | |
selected_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'seats', 'distance', 'J/T', | |
'CAT', 'dist'] | |
categorical_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'J/T', 'CAT'] | |
numerical_features = ['seats', 'distance', 'dist'] | |
X_train, X_test, y_train, y_test, preprocessor = preprocess_data(data, selected_features, categorical_features, | |
numerical_features) | |
model = build_model(X_train.shape[1]) | |
model, history = train_model(model, X_train, y_train) | |
mae, rmse = evaluate_model(model, X_test, y_test) | |
print(f'MAE: {mae}') | |
print(f'RMSE: {rmse}') | |
save_model(model, preprocessor, '../../saved_models/nn_model.keras', '../../saved_models/nn_preprocessor.pkl') | |
if __name__ == "__main__": | |
import os | |
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' | |
main() | |