In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scipy.stats import f

# Load the dataset
dataset = pd.read_csv('/Users/ashishpoudel/Downloads/AircraftFuelPrediction-main/datasets/preprocessed_data.csv')
dataset.dropna(inplace=True)

In [2]:
# Features and target
features = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
target = dataset['fuel_burn_total']

# Encoding the 'model' column
encoder = OneHotEncoder(sparse_output=False)
model_encoded = pd.DataFrame(encoder.fit_transform(features[['model']]))
model_encoded.columns = encoder.get_feature_names_out(['model'])

# Drop the original 'model' column and add the encoded data
features = features.drop('model', axis=1)
features = pd.concat([features.reset_index(drop=True), model_encoded.reset_index(drop=True)], axis=1)

# Train-test split
feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.1, random_state=42)

# Feature scaling
scaler = StandardScaler()
feature_train_scaled = scaler.fit_transform(feature_train)
feature_test_scaled = scaler.transform(feature_test)

In [7]:
# Neural network model
model = Sequential([
 Dense(64, activation='relu', input_shape=(feature_train_scaled.shape[1],)),
 Dense(64, activation='relu'),
 Dense(1)
])

# Compile and train the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(feature_train_scaled, target_train, epochs=50, batch_size=32, verbose=1)

# Evaluate the model
mse = model.evaluate(feature_test_scaled, target_test)
print("Mean Squared Error:", mse)

# Predictions and performance metrics
target_prediction = model.predict(feature_test_scaled)
r2 = r2_score(target_test, target_prediction)
mae = mean_absolute_error(target_test, target_prediction)
mse = mean_squared_error(target_test, target_prediction)

Epoch 1/50


 super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 140.5811
Epoch 2/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 1.9729
Epoch 3/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.7662
Epoch 4/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - loss: 0.8330
Epoch 5/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.7197
Epoch 6/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.7294
Epoch 7/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.6337
Epoch 8/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - loss: 0.4558
Epoch 9/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - loss: 0.3461
Epoch 10/50
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6m

In [6]:
# Calculate F-statistic and p-value 
n_samples = len(target)
n_predictors = feature_train_scaled.shape[1]
residual = n_samples - n_predictors - 1
explained_variance = r2 * np.sum((target - np.mean(target))**2)
unexplained_variance = mse * n_samples

F_value = (explained_variance / n_predictors) / (unexplained_variance / residual)
p_value = 1 - f.cdf(F_value, n_predictors, residual)
rse = np.sqrt(mse)

# Print the results
print(f"R-squared: {r2}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"p-value: {p_value:.4f}")
print(f"Root Squared Error: {rse:.2f}")
print(f"F-statistic: {F_value:.2f}")

R-squared: 0.9780861666108605
Mean Absolute Error: 0.7006260730692777
Mean Squared Error: 2.554603752569432
p-value: 0.0000
Root Squared Error: 1.60
F-statistic: 24052.88
