import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib


def load_data(file_path):
    return pd.read_csv(file_path)


def preprocess_data(data, selected_features, categorical_features, numerical_features):
    # Define preprocessing pipelines
    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())
    ])
    categorical_transformer = Pipeline(steps=[
        ('encoder', OneHotEncoder(handle_unknown='ignore'))
    ])

    # Combine preprocessing steps
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ])

    # Split the datasets
    X = data[selected_features]
    y = data['fuel_burn_total']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Preprocess the datasets
    X_train = preprocessor.fit_transform(X_train)
    X_test = preprocessor.transform(X_test)

    return X_train, X_test, y_train, y_test, preprocessor


def build_model(input_shape):
    model = Sequential([
        Input(shape=(input_shape,)),
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model


def train_model(model, X_train, y_train, epochs=50, batch_size=32, patience=10, validation_split=0.2):
    """
        Trains the provided model using the training data.

        Parameters:
        model (tensorflow.keras.Model): The model to be trained.
        X_train (numpy.ndarray): The training data.
        y_train (numpy.ndarray): The target values for the training data.
        epochs (int, optional): The number of epochs to train the model. Default is 50.
        batch_size (int, optional): The number of samples per gradient update. Default is 32.
        patience (int, optional): Number of epochs with no improvement after which training will be stopped. Default is 10.
        validation_split (float, optional): Fraction of the training data to be used as validation data. Default is 0.2.

        Returns:
        model (tensorflow.keras.Model): The trained model.
        history (tensorflow.python.keras.callbacks.History): A record of training loss values and metrics values at successive epochs.
        """
    early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
    history = model.fit(X_train, y_train, validation_split=validation_split, epochs=epochs, callbacks=[early_stopping],
                        batch_size=batch_size)

    return model, history


def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = np.mean(np.abs(y_test - y_pred.flatten()))
    rmse = np.sqrt(np.mean((y_test - y_pred.flatten()) ** 2))
    return mae, rmse


def save_model(model, preprocessor, model_path, preprocessor_path):
    model.save(model_path)
    joblib.dump(preprocessor, preprocessor_path)


def main():
    data = load_data('../../datasets/preprocessed_data.csv')
    selected_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'seats', 'distance', 'J/T',
                         'CAT', 'dist']

    categorical_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'J/T', 'CAT']
    numerical_features = ['seats', 'distance', 'dist']

    X_train, X_test, y_train, y_test, preprocessor = preprocess_data(data, selected_features, categorical_features,
                                                                     numerical_features)

    model = build_model(X_train.shape[1])

    model, history = train_model(model, X_train, y_train)

    mae, rmse = evaluate_model(model, X_test, y_test)
    print(f'MAE: {mae}')
    print(f'RMSE: {rmse}')

    save_model(model, preprocessor, '../../saved_models/nn_model.keras', '../../saved_models/nn_preprocessor.pkl')


if __name__ == "__main__":
    import os

    os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
    main()