import joblib import numpy as np import pandas as pd import tensorflow as tf def load_data(path): df = pd.read_csv(path) df = df.drop(df.columns[0], axis=1) # drop the Unnamed: 0 column return df def load_model_and_preprocessor(model_path, preprocessor_path): loaded_model = tf.keras.models.load_model(model_path) preprocessor = joblib.load(preprocessor_path) return loaded_model, preprocessor def select_features(df, selected_features): X_test = df[selected_features] y_test = df['fuel_burn_total'] return X_test, y_test def preprocess_data(preprocessor, X_test): X_test_processed = preprocessor.transform(X_test) return X_test_processed def predict_in_batches(loaded_model, X_test_processed, y_test, batch_size): num_batches = X_test_processed.shape[0] // batch_size + int(X_test_processed.shape[0] % batch_size != 0) total_accuracy = 0 for batch_num in range(num_batches): start_index = batch_num * batch_size end_index = min(start_index + batch_size, X_test_processed.shape[0]) batch_X = X_test_processed[start_index:end_index] batch_y = y_test.iloc[start_index:end_index] # Make predictions with the loaded final model batch_predictions = loaded_model.predict(batch_X) # Calculate accuracy for the current batch batch_accuracy = 1 - np.mean(np.abs(batch_y.values - batch_predictions[:, 0]) / batch_y.values) total_accuracy += batch_accuracy * (end_index - start_index) print(f'Batch {batch_num + 1}/{num_batches} - Accuracy: {batch_accuracy:.2%}') average_accuracy = total_accuracy / X_test_processed.shape[0] print(f'Average Accuracy: {average_accuracy:.2%}') def main(): df = load_data('../../datasets/test.csv') loaded_model, preprocessor = load_model_and_preprocessor('../../saved_models/nn_model.keras', '../../saved_models/nn_preprocessor.pkl') selected_features = [ 'Origin_Airport', 'Destination_Airport', 'Operating_Airline', 'model', '_Manufacturer', 'seats', 'distance', '_Operating_Airline_ASK_(Millions)', 'FLIGHT_ID', 'FFLOW_KGM', 'J/T', 'CAT', 'dist', 'mean_taxi_in' ] # Select only the relevant features X_test, y_test = select_features(df, selected_features) X_test_processed = preprocess_data(preprocessor, X_test) predict_in_batches(loaded_model, X_test_processed, y_test, batch_size=32) if __name__ == "__main__": import os os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' main()