Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""decision_tree_regressor.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9 | |
""" | |
import pandas as pd | |
import requests | |
import numpy as np | |
from sklearn.linear_model import LinearRegression | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error | |
from sklearn.ensemble import GradientBoostingRegressor | |
from sklearn.tree import DecisionTreeRegressor | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
# the dataset I am using is from RapidApi | |
api_key = '93844a03b8msh43e83be923422abp10fb67jsne048c3017988' | |
url = 'https://fliteroute.p.rapidapi.com/api/gcfuelandtime/origin/GVA/dest/MIA/model/A320' | |
headers = {'x-rapidapi-host': 'fliteroute.p.rapidapi.com', 'x-rapidapi-key': api_key} | |
response = requests.get(url, headers = headers) | |
if response.status_code == 200: | |
data = response.json() | |
print(data) | |
else: | |
print({response.status_code}, {response.text}) | |
# Note climbData and descendData is not being used since there is only one key entry for both features | |
# Decision Tree Regressor | |
features = [] # taking out features | |
for flight in data["features"]: | |
properties = flight["properties"] | |
geometry = flight["geometry"]["coordinates"] | |
distance_km = float(properties["dist_km"]) | |
cruise_time = int(properties["cruiseTime"]) | |
fuel = float(properties["fuel"]) | |
CO2 = float(properties["CO2"]) | |
features.append([distance_km, cruise_time, CO2, fuel]) | |
df = pd.DataFrame(features, columns = ["distance_km", "cruise_time", "CO2", "fuel"]) # converting to data frame | |
feature = df.drop("fuel", axis = 1) | |
target = df["fuel"] | |
feature_train, feature_test, target_train, target_test = train_test_split(df.drop("fuel", axis=1), df["fuel"], test_size=0.1, random_state=42) | |
# split into train and test | |
regression_tree = DecisionTreeRegressor(max_depth = 100, min_samples_leaf = 50, random_state = 42) # Can also chnage the hyperparameters | |
regression_tree.fit(feature_train, target_train) | |
target_prediction = regression_tree.predict(feature_test) # making the predictions | |
mse = mean_squared_error(target_test, target_prediction) | |
r2 = r2_score(target_test, target_prediction) | |
mae = mean_absolute_error(target_test, target_prediction) | |
feature_we_want = len(target) # what we are looking for | |
regression = 1 # there is only one predictor | |
residual = feature_we_want - 2 | |
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2) | |
unexplained_variance = mse * feature_we_want | |
F_value = (explained_variance / regression) / (unexplained_variance / residual) # calculating the F statistic for the report purposes | |
p_value = 1 - f.cdf(F_value, regression, residual) | |
rse = np.sqrt(mse) | |
print(f"mean squared e {mse}") | |
print(f"Rsquared {r2}") | |
print(f"mean absolute error {mae}") | |
print(f"regression: {regression:.4f}") | |
print(f"residual: {residual:.4f}") | |
print(f"p-value: {p_value:.4f}") # calculating P value for the report | |
print(f"standard error: {rse:.2f}") | |
print(f"f-statistic: {F_value:.2f}") | |
# Very high mse and mae | |
# Played with hyperparameters need to learn a bit more regarding some of them | |
# metrics still high this is a bad model |