File size: 3,269 Bytes
f637442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-
"""decision_tree_regressor.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
"""

import pandas as pd
import requests
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# the dataset I am using is from RapidApi
api_key = '93844a03b8msh43e83be923422abp10fb67jsne048c3017988'
url = 'https://fliteroute.p.rapidapi.com/api/gcfuelandtime/origin/GVA/dest/MIA/model/A320'
headers = {'x-rapidapi-host': 'fliteroute.p.rapidapi.com', 'x-rapidapi-key': api_key}
response = requests.get(url, headers = headers)
if response.status_code == 200:
    data = response.json()
    print(data)
else:
    print({response.status_code}, {response.text})
# Note climbData and descendData is not being used since there is only one key entry for both features

# Decision Tree Regressor

features = []  # taking out features
for flight in data["features"]:
    properties = flight["properties"]
    geometry = flight["geometry"]["coordinates"]
    distance_km = float(properties["dist_km"])
    cruise_time = int(properties["cruiseTime"])
    fuel = float(properties["fuel"])
    CO2 = float(properties["CO2"])
    features.append([distance_km, cruise_time, CO2, fuel])

df = pd.DataFrame(features, columns = ["distance_km", "cruise_time", "CO2", "fuel"])  # converting to data frame
feature = df.drop("fuel", axis = 1)
target = df["fuel"]

feature_train, feature_test, target_train, target_test = train_test_split(df.drop("fuel", axis=1), df["fuel"], test_size=0.1, random_state=42)
    # split into train and test

regression_tree = DecisionTreeRegressor(max_depth = 100, min_samples_leaf = 50, random_state = 42)   # Can also chnage the hyperparameters
regression_tree.fit(feature_train, target_train)
target_prediction = regression_tree.predict(feature_test)   # making the predictions

mse = mean_squared_error(target_test, target_prediction)
r2 = r2_score(target_test, target_prediction)
mae = mean_absolute_error(target_test, target_prediction)



feature_we_want = len(target) # what we are looking for
regression = 1  # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want

F_value = (explained_variance / regression) / (unexplained_variance / residual)  # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)


print(f"mean squared e {mse}")
print(f"Rsquared {r2}")
print(f"mean absolute error {mae}")
print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}")   # calculating P value for the report
print(f"standard error: {rse:.2f}")
print(f"f-statistic: {F_value:.2f}")


# Very high mse and mae

# Played with hyperparameters need to learn a bit more regarding some of them

# metrics still high this is a bad model