Spaces:

poudel
/

AircraftFuelPredictorV2

Runtime error

App Files Files Community

poudel commited on 27 days ago

Commit

5728f18

•

1 Parent(s): 654d76b

Delete neural_network

Browse files

Files changed (4) hide show

neural_network/__pycache__/inference.cpython-39.pyc +0 -0
neural_network/inference.py +0 -76
neural_network/model.py +0 -118
neural_network/neural_network.py +0 -161

neural_network/__pycache__/inference.cpython-39.pyc DELETED Viewed

Binary file (2.53 kB)

neural_network/inference.py DELETED Viewed

@@ -1,76 +0,0 @@
-import joblib
-import numpy as np
-import pandas as pd
-import tensorflow as tf
-def load_data(path):
-    df = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
-    df = df.drop(df.columns[0], axis=1)  # drop the Unnamed: 0 column
-    return df
-def load_model_and_preprocessor(model_path, preprocessor_path):
-    loaded_model = tf.keras.models.load_model('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_model.keras')
-    preprocessor = joblib.load('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_preprocessor.pkl')
-    return loaded_model, preprocessor
-def select_features(df, selected_features):
-    X_test = df[selected_features]
-    y_test = df['fuel_burn_total']
-    return X_test, y_test
-def preprocess_data(preprocessor, X_test):
-    X_test_processed = preprocessor.transform(X_test)
-    return X_test_processed
-def predict_in_batches(loaded_model, X_test_processed, y_test, batch_size):
-    num_batches = X_test_processed.shape[0] // batch_size + int(X_test_processed.shape[0] % batch_size != 0)
-    total_accuracy = 0
-    for batch_num in range(num_batches):
-        start_index = batch_num * batch_size
-        end_index = min(start_index + batch_size, X_test_processed.shape[0])
-        batch_X = X_test_processed[start_index:end_index]
-        batch_y = y_test.iloc[start_index:end_index]
-        # Make predictions with the loaded final model
-        batch_predictions = loaded_model.predict(batch_X)
-        # Calculate accuracy for the current batch
-        batch_accuracy = 1 - np.mean(np.abs(batch_y.values - batch_predictions[:, 0]) / batch_y.values)
-        total_accuracy += batch_accuracy * (end_index - start_index)
-        print(f'Batch {batch_num + 1}/{num_batches} - Accuracy: {batch_accuracy:.2%}')
-    average_accuracy = total_accuracy / X_test_processed.shape[0]
-    print(f'Average Accuracy: {average_accuracy:.2%}')
-def main():
-    df = load_data('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
-    loaded_model, preprocessor = load_model_and_preprocessor( '/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_model.keras',
-        '/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/saved_models/nn_preprocessor.pkl')
-    selected_features = [
-        'Origin_Airport', 'Destination_Airport', 'Operating_Airline', 'model', '_Manufacturer',
-        'seats', 'distance', '_Operating_Airline_ASK_(Millions)', 'FLIGHT_ID', 'FFLOW_KGM',
-        'J/T', 'CAT', 'dist', 'mean_taxi_in'
-    ]
-    # Select only the relevant features
-    X_test, y_test = select_features(df, selected_features)
-    X_test_processed = preprocess_data(preprocessor, X_test)
-    predict_in_batches(loaded_model, X_test_processed, y_test, batch_size=32)
-if __name__ == "__main__":
-    import os
-    os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
-    main()

neural_network/model.py DELETED Viewed

@@ -1,118 +0,0 @@
-import pandas as pd
-import numpy as np
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Dense, Dropout, Input
-from tensorflow.keras.callbacks import EarlyStopping
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
-from sklearn.compose import ColumnTransformer
-from sklearn.pipeline import Pipeline
-import joblib
-def load_data(file_path):
-    return pd.read_csv(file_path)
-def preprocess_data(data, selected_features, categorical_features, numerical_features):
-    # Define preprocessing pipelines
-    numeric_transformer = Pipeline(steps=[
-        ('scaler', StandardScaler())
-    ])
-    categorical_transformer = Pipeline(steps=[
-        ('encoder', OneHotEncoder(handle_unknown='ignore'))
-    ])
-    # Combine preprocessing steps
-    preprocessor = ColumnTransformer(
-        transformers=[
-            ('num', numeric_transformer, numerical_features),
-            ('cat', categorical_transformer, categorical_features)
-        ])
-    # Split the datasets
-    X = data[selected_features]
-    y = data['fuel_burn_total']
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    # Preprocess the datasets
-    X_train = preprocessor.fit_transform(X_train)
-    X_test = preprocessor.transform(X_test)
-    return X_train, X_test, y_train, y_test, preprocessor
-def build_model(input_shape):
-    model = Sequential([
-        Input(shape=(input_shape,)),
-        Dense(64, activation='relu'),
-        Dense(64, activation='relu'),
-        Dense(1)
-    ])
-    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
-    return model
-def train_model(model, X_train, y_train, epochs=50, batch_size=32, patience=10, validation_split=0.2):
-    """
-        Trains the provided model using the training data.
-        Parameters:
-        model (tensorflow.keras.Model): The model to be trained.
-        X_train (numpy.ndarray): The training data.
-        y_train (numpy.ndarray): The target values for the training data.
-        epochs (int, optional): The number of epochs to train the model. Default is 50.
-        batch_size (int, optional): The number of samples per gradient update. Default is 32.
-        patience (int, optional): Number of epochs with no improvement after which training will be stopped. Default is 10.
-        validation_split (float, optional): Fraction of the training data to be used as validation data. Default is 0.2.
-        Returns:
-        model (tensorflow.keras.Model): The trained model.
-        history (tensorflow.python.keras.callbacks.History): A record of training loss values and metrics values at successive epochs.
-        """
-    early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
-    history = model.fit(X_train, y_train, validation_split=validation_split, epochs=epochs, callbacks=[early_stopping],
-                        batch_size=batch_size)
-    return model, history
-def evaluate_model(model, X_test, y_test):
-    y_pred = model.predict(X_test)
-    mae = np.mean(np.abs(y_test - y_pred.flatten()))
-    rmse = np.sqrt(np.mean((y_test - y_pred.flatten()) ** 2))
-    return mae, rmse
-def save_model(model, preprocessor, model_path, preprocessor_path):
-    model.save(model_path)
-    joblib.dump(preprocessor, preprocessor_path)
-def main():
-    data = load_data('../../datasets/preprocessed_data.csv')
-    selected_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'seats', 'distance', 'J/T',
-                         'CAT', 'dist']
-    categorical_features = ['Origin_Airport', 'Destination_Airport', 'model', '_Manufacturer', 'J/T', 'CAT']
-    numerical_features = ['seats', 'distance', 'dist']
-    X_train, X_test, y_train, y_test, preprocessor = preprocess_data(data, selected_features, categorical_features,
-                                                                     numerical_features)
-    model = build_model(X_train.shape[1])
-    model, history = train_model(model, X_train, y_train)
-    mae, rmse = evaluate_model(model, X_test, y_test)
-    print(f'MAE: {mae}')
-    print(f'RMSE: {rmse}')
-    save_model(model, preprocessor, '../../saved_models/nn_model.keras', '../../saved_models/nn_preprocessor.pkl')
-if __name__ == "__main__":
-    import os
-    os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
-    main()

neural_network/neural_network.py DELETED Viewed

@@ -1,161 +0,0 @@
-# # -*- coding: utf-8 -*-
-# """neural_network.ipynb
-# Automatically generated by Colab.
-# Original file is located at
-#     https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
-# """
-# import pandas as pd
-# import requests
-# import numpy as np
-# from sklearn.model_selection import train_test_split
-# from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
-# from sklearn.preprocessing import StandardScaler, OneHotEncoder
-# from tensorflow.keras.models import Sequential
-# from tensorflow.keras.layers import Dense
-# from scipy.stats import f
-# # Neural Network model
-# # Note here I am using a new dataset which Abdulelah shared with me.
-# # dataa filename "preprocessed_data.csv"
-# dataset = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
-#                           # using dataset Abdulelah gave me
-# dataset.dropna(inplace = True)
-# dataset.head()
-# feature = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
-# target = dataset['fuel_burn_total']
-# feature = feature.copy()
-# feature.drop('model', axis=1, inplace=True)
-# # doing encoding
-# encoder = OneHotEncoder(sparse_output = False)
-# feature_encoded = pd.DataFrame(encoder.fit_transform(feature[['model']]))
-# feature_encoded.columns = encoder.get_feature_names_out(['model'])
-# feature.drop('model', axis = 1, inplace = True)
-# feature = pd.concat([feature.reset_index(drop = True), feature_encoded.reset_index(drop = True)], axis = 1)
-# feature_train, feature_test, target_train, target_test = train_test_split(feature, target, test_size = 0.1, random_state = 42)   # split into train and test
-# scaler = StandardScaler()
-# feature_train_scaled = scaler.fit_transform(feature_train)
-# feature_test_scaled = scaler.transform(feature_test)
-# # building the model
-# model = Sequential([
-#     Dense(64, activation = 'relu', input_shape = (feature_train_scaled.shape[1],)),
-#     Dense(64, activation = 'relu'),
-#     Dense(1)])  # can change dense
-# model.compile(optimizer = 'adam', loss = 'mean_squared_error')   # compiling model
-# model.fit(feature_train_scaled, target_train, epochs = 50, batch_size = 32, verbose = 1)   # training model
-# mse = model.evaluate(feature_test_scaled, target_test)
-# print("mean squared e", mse)
-# target_prediction = model.predict(feature_test_scaled)
-# r2 = r2_score(target_test, target_prediction)
-# mae = mean_absolute_error(target_test, target_prediction)
-# mse = mean_squared_error(target_test, target_prediction)
-# feature_we_want = len(target) # what we are looking for
-# regression = 1  # there is only one predictor
-# residual = feature_we_want - 2
-# explained_variance = r2 * np.sum((target - np.mean(target))**2)
-# unexplained_variance = mse * feature_we_want
-# F_value = (explained_variance / regression) / (unexplained_variance / residual)  # calculating the F statistic for the report purposes
-# p_value = 1 - f.cdf(F_value, regression, residual)
-# rse = np.sqrt(mse)
-# print(f"Rquared {r2}")
-# print(f"mean absolute e {mae}")
-# print(f"mean squared e {mse}")
-# print(f"regression: {regression:.4f}")
-# print(f"residual: {residual:.4f}")
-# print(f"p-value: {p_value:.4f}")   # calculating P value for the report
-# print(f"standard error: {rse:.2f}")
-# print(f"f-statistic: {F_value:.2f}")
-# # the mse difference between the predicted and actual fuel burn totals on the model is around 4.97, it it was lower it would be better
-# # mse is 0 now this is a good model !
-import pandas as pd
-import numpy as np
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Dense
-from scipy.stats import f
-# Load the dataset
-dataset = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
-dataset.dropna(inplace=True)
-# Features and target
-features = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
-target = dataset['fuel_burn_total']
-# Encoding the 'model' column
-encoder = OneHotEncoder(sparse_output=False)
-model_encoded = pd.DataFrame(encoder.fit_transform(features[['model']]))
-model_encoded.columns = encoder.get_feature_names_out(['model'])
-# Drop the original 'model' column and add the encoded data
-features = features.drop('model', axis=1)
-features = pd.concat([features.reset_index(drop=True), model_encoded.reset_index(drop=True)], axis=1)
-# Train-test split
-feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.1, random_state=42)
-# Feature scaling
-scaler = StandardScaler()
-feature_train_scaled = scaler.fit_transform(feature_train)
-feature_test_scaled = scaler.transform(feature_test)
-# Neural network model
-model = Sequential([
-    Dense(64, activation='relu', input_shape=(feature_train_scaled.shape[1],)),
-    Dense(64, activation='relu'),
-    Dense(1)
-])
-# Compile and train the model
-model.compile(optimizer='adam', loss='mean_squared_error')
-model.fit(feature_train_scaled, target_train, epochs=50, batch_size=32, verbose=1)
-# Evaluate the model
-mse = model.evaluate(feature_test_scaled, target_test)
-print("Mean Squared Error:", mse)
-# Predictions and performance metrics
-target_prediction = model.predict(feature_test_scaled)
-r2 = r2_score(target_test, target_prediction)
-mae = mean_absolute_error(target_test, target_prediction)
-mse = mean_squared_error(target_test, target_prediction)
-# Calculate F-statistic and p-value (for reporting purposes)
-n_samples = len(target)
-n_predictors = feature_train_scaled.shape[1]
-residual = n_samples - n_predictors - 1
-explained_variance = r2 * np.sum((target - np.mean(target))**2)
-unexplained_variance = mse * n_samples
-F_value = (explained_variance / n_predictors) / (unexplained_variance / residual)
-p_value = 1 - f.cdf(F_value, n_predictors, residual)
-rse = np.sqrt(mse)
-# Print the results
-print(f"R-squared: {r2}")
-print(f"Mean Absolute Error: {mae}")
-print(f"Mean Squared Error: {mse}")
-print(f"p-value: {p_value:.4f}")
-print(f"Root Squared Error: {rse:.2f}")
-print(f"F-statistic: {F_value:.2f}")