File size: 6,470 Bytes
f637442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# -*- coding: utf-8 -*-
"""linear_regression.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
"""

import pandas as pd
import requests
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import tensorflow as tf

# the dataset I am using is from RapidApi
api_key = '93844a03b8msh43e83be923422abp10fb67jsne048c3017988'
url = 'https://fliteroute.p.rapidapi.com/api/gcfuelandtime/origin/GVA/dest/MIA/model/A320'
headers = {'x-rapidapi-host': 'fliteroute.p.rapidapi.com', 'x-rapidapi-key': api_key}
response = requests.get(url, headers = headers)
if response.status_code == 200:
    data = response.json()
    print(data)
else:
    print({response.status_code}, {response.text})
# Note climbData and descendData is not being used since there is only one key entry for both features

# Linear regression model
# Here I am using two features "fuel" and "dist_nm"
data = response.json()
fuel = []
distance = []

for segment in data['features']:
    fuel.append(float(segment['properties']['fuel']))
    distance.append(float(segment['properties']['dist_nm']))

# converting th np
fuel = np.array(fuel).reshape(-1, 1)
distance = np.array(distance).reshape(-1, 1)

model = LinearRegression()  # passing and training the model
model.fit(distance, fuel)  # fitting the model

predicted_fuel = model.predict(distance)    # predicted_fuel is the predicted values

# looking at the model metrics
mse = mean_squared_error(fuel, predicted_fuel)
r2 = r2_score(fuel, predicted_fuel)
future_distance_nm = 30.90  # you can change the value of future_distance_nm
predicted_fuel_future = model.predict([[future_distance_nm]]) # you will need predicted_fuel

feature_we_want = len(fuel) # what we are looking for
regression = 1  # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want

F_value = (explained_variance / regression) / (unexplained_variance / residual)  # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)

mean_distance = np.mean(distance)
se_coefficient = rse / np.sqrt(np.sum((distance - mean_distance)**2))

print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}")   # calculating P value for the report
print(f"r^2 score: {r2:.2f}")
print(f"average fuel: {model.coef_[0][0]:.2f}")  # average of fuel based on the dataset
print(f"mean squared error: {mse:.2f}")
print(f"f-statistic: {F_value:.2f}")
print(f"standard error: {rse:.2f}")
print(f"predicted fuel needed for a {future_distance_nm} nm flight: {predicted_fuel_future[0][0]:.2f} kg")

# this is a more in depth of tthe Linear regression model since its giving good results
# Here I selected more important features that contribute to the total fuel needed for the flight

features = [feature['properties'] for feature in data['features']]  # takking the important features
df = pd.DataFrame(features)
numeric_cols = ['dist_km', 'cruiseTime', 'fuel', 'CO2', 'dist_nm']  # Can add or take off features
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors = 'coerce')

df.rename(columns={'fuel': 'cruiseFuel'}, inplace = True)
features = df[['dist_km', 'cruiseTime', 'CO2', 'dist_nm']]  # Can add or take off features
target = df['cruiseFuel']
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.1, random_state = 42)   # split into train and test

model = LinearRegression()    # model
model.fit(features_train, target_train)  # fitting the model
target_prediction = model.predict(features_test)   # making predctions

mse = mean_squared_error(target_test, target_prediction)
r2 = r2_score(target_test, target_prediction)
mae = mean_absolute_error(target_test, target_prediction)
future_distance_nm = [30.90, 40, 1894.34, 23.9]  # you can change the value of future_distance_nm ['dist_km', 'cruiseTime', 'CO2', 'dist_nm']
predicted_fuel_future = model.predict([future_distance_nm]) # you will need predicted_fuel
average_predicted_fuel_per_nm = (target_prediction / features_test['dist_nm']).mean()     # can change to "dist_km" to see the average in km

feature_we_want = len(target) # what we are looking for
regression = 1  # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want

F_value = (explained_variance / regression) / (unexplained_variance / residual)  # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)

print(f"mean squared error {mse:.2f}")
print(f"Rsquared {r2:.2f}")
print(f"mean absolute error {mae:.2f}")
print(f"average fuel consumption per nautical mile:: {average_predicted_fuel_per_nm:.2f} for LR model")
print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}")   # calculating P value for the report
print(f"standard error: {rse:.2f}")
print(f"f-statistic: {F_value:.2f}")
print(f"predicted fuel needed for a {future_distance_nm} nm flight: {predicted_fuel_future[0]:.2f} kg")
# mse is 26.97 which is low this means that the model is perfoming good
# in this line mse = mean_squared_error(target_test, target_prediction) if you chnage target_test to features_test you will get same mse
# Rsquare is close to 1 this mean the model is a good fit
# mae is 3.5 this explains why some numbers are a bit different but the predicted valuesare close ot the actual ones

# the mse went down to 0.0 so this is good !! but im a bit scketchy
# r square went up to 1 so the model is a good fit
# the mae went down to 0

# this reults is for the above model
mean_cruise_fuel = df['cruiseFuel'].mean()  # calculating the mean of the cruiseFuel values
mse_to_mean_ratio = mse / mean_cruise_fuel  # calculating the ratio of mse to the mean cruiseFuel
mean_cruise_fuel, mse_to_mean_ratio

# the number 0.0162% means that the mse is small compared to the mean_cruise_fuel this is goog, again the predictions are
# close to the actual value

# numbers went down even more!!!