Spaces:

poudel
/

AircraftFuelPredictorV2

Runtime error

File size: 6,470 Bytes

f637442

# -*- coding: utf-8 -*-
"""linear_regression.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
"""

import pandas as pd
import requests
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import tensorflow as tf

# the dataset I am using is from RapidApi
api_key = '93844a03b8msh43e83be923422abp10fb67jsne048c3017988'
url = 'https://fliteroute.p.rapidapi.com/api/gcfuelandtime/origin/GVA/dest/MIA/model/A320'
headers = {'x-rapidapi-host': 'fliteroute.p.rapidapi.com', 'x-rapidapi-key': api_key}
response = requests.get(url, headers = headers)
if response.status_code == 200:
    data = response.json()
    print(data)
else:
    print({response.status_code}, {response.text})
# Note climbData and descendData is not being used since there is only one key entry for both features

# Linear regression model
# Here I am using two features "fuel" and "dist_nm"
data = response.json()
fuel = []
distance = []

for segment in data['features']:
    fuel.append(float(segment['properties']['fuel']))
    distance.append(float(segment['properties']['dist_nm']))

# converting th np
fuel = np.array(fuel).reshape(-1, 1)
distance = np.array(distance).reshape(-1, 1)

model = LinearRegression()  # passing and training the model
model.fit(distance, fuel)  # fitting the model

predicted_fuel = model.predict(distance)    # predicted_fuel is the predicted values

# looking at the model metrics
mse = mean_squared_error(fuel, predicted_fuel)
r2 = r2_score(fuel, predicted_fuel)
future_distance_nm = 30.90  # you can change the value of future_distance_nm
predicted_fuel_future = model.predict([[future_distance_nm]]) # you will need predicted_fuel

feature_we_want = len(fuel) # what we are looking for
regression = 1  # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want

F_value = (explained_variance / regression) / (unexplained_variance / residual)  # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)

mean_distance = np.mean(distance)
se_coefficient = rse / np.sqrt(np.sum((distance - mean_distance)**2))

print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}")   # calculating P value for the report
print(f"r^2 score: {r2:.2f}")
print(f"average fuel: {model.coef_[0][0]:.2f}")  # average of fuel based on the dataset
print(f"mean squared error: {mse:.2f}")
print(f"f-statistic: {F_value:.2f}")
print(f"standard error: {rse:.2f}")
print(f"predicted fuel needed for a {future_distance_nm} nm flight: {predicted_fuel_future[0][0]:.2f} kg")

# this is a more in depth of tthe Linear regression model since its giving good results
# Here I selected more important features that contribute to the total fuel needed for the flight

features = [feature['properties'] for feature in data['features']]  # takking the important features
df = pd.DataFrame(features)
numeric_cols = ['dist_km', 'cruiseTime', 'fuel', 'CO2', 'dist_nm']  # Can add or take off features
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors = 'coerce')

df.rename(columns={'fuel': 'cruiseFuel'}, inplace = True)
features = df[['dist_km', 'cruiseTime', 'CO2', 'dist_nm']]  # Can add or take off features
target = df['cruiseFuel']
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.1, random_state = 42)   # split into train and test

model = LinearRegression()    # model
model.fit(features_train, target_train)  # fitting the model
target_prediction = model.predict(features_test)   # making predctions

mse = mean_squared_error(target_test, target_prediction)
r2 = r2_score(target_test, target_prediction)
mae = mean_absolute_error(target_test, target_prediction)
future_distance_nm = [30.90, 40, 1894.34, 23.9]  # you can change the value of future_distance_nm ['dist_km', 'cruiseTime', 'CO2', 'dist_nm']
predicted_fuel_future = model.predict([future_distance_nm]) # you will need predicted_fuel
average_predicted_fuel_per_nm = (target_prediction / features_test['dist_nm']).mean()     # can change to "dist_km" to see the average in km

feature_we_want = len(target) # what we are looking for
regression = 1  # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want

F_value = (explained_variance / regression) / (unexplained_variance / residual)  # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)

print(f"mean squared error {mse:.2f}")
print(f"Rsquared {r2:.2f}")
print(f"mean absolute error {mae:.2f}")
print(f"average fuel consumption per nautical mile:: {average_predicted_fuel_per_nm:.2f} for LR model")
print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}")   # calculating P value for the report
print(f"standard error: {rse:.2f}")
print(f"f-statistic: {F_value:.2f}")
print(f"predicted fuel needed for a {future_distance_nm} nm flight: {predicted_fuel_future[0]:.2f} kg")
# mse is 26.97 which is low this means that the model is perfoming good
# in this line mse = mean_squared_error(target_test, target_prediction) if you chnage target_test to features_test you will get same mse
# Rsquare is close to 1 this mean the model is a good fit
# mae is 3.5 this explains why some numbers are a bit different but the predicted valuesare close ot the actual ones

# the mse went down to 0.0 so this is good !! but im a bit scketchy
# r square went up to 1 so the model is a good fit
# the mae went down to 0

# this reults is for the above model
mean_cruise_fuel = df['cruiseFuel'].mean()  # calculating the mean of the cruiseFuel values
mse_to_mean_ratio = mse / mean_cruise_fuel  # calculating the ratio of mse to the mean cruiseFuel
mean_cruise_fuel, mse_to_mean_ratio

# the number 0.0162% means that the mse is small compared to the mean_cruise_fuel this is goog, again the predictions are
# close to the actual value

# numbers went down even more!!!