Spaces:
Runtime error
Runtime error
File size: 6,470 Bytes
f637442 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# -*- coding: utf-8 -*-
"""linear_regression.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
"""
import pandas as pd
import requests
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import tensorflow as tf
# the dataset I am using is from RapidApi
api_key = '93844a03b8msh43e83be923422abp10fb67jsne048c3017988'
url = 'https://fliteroute.p.rapidapi.com/api/gcfuelandtime/origin/GVA/dest/MIA/model/A320'
headers = {'x-rapidapi-host': 'fliteroute.p.rapidapi.com', 'x-rapidapi-key': api_key}
response = requests.get(url, headers = headers)
if response.status_code == 200:
data = response.json()
print(data)
else:
print({response.status_code}, {response.text})
# Note climbData and descendData is not being used since there is only one key entry for both features
# Linear regression model
# Here I am using two features "fuel" and "dist_nm"
data = response.json()
fuel = []
distance = []
for segment in data['features']:
fuel.append(float(segment['properties']['fuel']))
distance.append(float(segment['properties']['dist_nm']))
# converting th np
fuel = np.array(fuel).reshape(-1, 1)
distance = np.array(distance).reshape(-1, 1)
model = LinearRegression() # passing and training the model
model.fit(distance, fuel) # fitting the model
predicted_fuel = model.predict(distance) # predicted_fuel is the predicted values
# looking at the model metrics
mse = mean_squared_error(fuel, predicted_fuel)
r2 = r2_score(fuel, predicted_fuel)
future_distance_nm = 30.90 # you can change the value of future_distance_nm
predicted_fuel_future = model.predict([[future_distance_nm]]) # you will need predicted_fuel
feature_we_want = len(fuel) # what we are looking for
regression = 1 # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want
F_value = (explained_variance / regression) / (unexplained_variance / residual) # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)
mean_distance = np.mean(distance)
se_coefficient = rse / np.sqrt(np.sum((distance - mean_distance)**2))
print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}") # calculating P value for the report
print(f"r^2 score: {r2:.2f}")
print(f"average fuel: {model.coef_[0][0]:.2f}") # average of fuel based on the dataset
print(f"mean squared error: {mse:.2f}")
print(f"f-statistic: {F_value:.2f}")
print(f"standard error: {rse:.2f}")
print(f"predicted fuel needed for a {future_distance_nm} nm flight: {predicted_fuel_future[0][0]:.2f} kg")
# this is a more in depth of tthe Linear regression model since its giving good results
# Here I selected more important features that contribute to the total fuel needed for the flight
features = [feature['properties'] for feature in data['features']] # takking the important features
df = pd.DataFrame(features)
numeric_cols = ['dist_km', 'cruiseTime', 'fuel', 'CO2', 'dist_nm'] # Can add or take off features
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors = 'coerce')
df.rename(columns={'fuel': 'cruiseFuel'}, inplace = True)
features = df[['dist_km', 'cruiseTime', 'CO2', 'dist_nm']] # Can add or take off features
target = df['cruiseFuel']
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.1, random_state = 42) # split into train and test
model = LinearRegression() # model
model.fit(features_train, target_train) # fitting the model
target_prediction = model.predict(features_test) # making predctions
mse = mean_squared_error(target_test, target_prediction)
r2 = r2_score(target_test, target_prediction)
mae = mean_absolute_error(target_test, target_prediction)
future_distance_nm = [30.90, 40, 1894.34, 23.9] # you can change the value of future_distance_nm ['dist_km', 'cruiseTime', 'CO2', 'dist_nm']
predicted_fuel_future = model.predict([future_distance_nm]) # you will need predicted_fuel
average_predicted_fuel_per_nm = (target_prediction / features_test['dist_nm']).mean() # can change to "dist_km" to see the average in km
feature_we_want = len(target) # what we are looking for
regression = 1 # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want
F_value = (explained_variance / regression) / (unexplained_variance / residual) # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)
print(f"mean squared error {mse:.2f}")
print(f"Rsquared {r2:.2f}")
print(f"mean absolute error {mae:.2f}")
print(f"average fuel consumption per nautical mile:: {average_predicted_fuel_per_nm:.2f} for LR model")
print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}") # calculating P value for the report
print(f"standard error: {rse:.2f}")
print(f"f-statistic: {F_value:.2f}")
print(f"predicted fuel needed for a {future_distance_nm} nm flight: {predicted_fuel_future[0]:.2f} kg")
# mse is 26.97 which is low this means that the model is perfoming good
# in this line mse = mean_squared_error(target_test, target_prediction) if you chnage target_test to features_test you will get same mse
# Rsquare is close to 1 this mean the model is a good fit
# mae is 3.5 this explains why some numbers are a bit different but the predicted valuesare close ot the actual ones
# the mse went down to 0.0 so this is good !! but im a bit scketchy
# r square went up to 1 so the model is a good fit
# the mae went down to 0
# this reults is for the above model
mean_cruise_fuel = df['cruiseFuel'].mean() # calculating the mean of the cruiseFuel values
mse_to_mean_ratio = mse / mean_cruise_fuel # calculating the ratio of mse to the mean cruiseFuel
mean_cruise_fuel, mse_to_mean_ratio
# the number 0.0162% means that the mse is small compared to the mean_cruise_fuel this is goog, again the predictions are
# close to the actual value
# numbers went down even more!!! |