Spaces:

poudel
/

AircraftFuelPredictorV2

Runtime error

App Files Files Community

AircraftFuelPredictorV2 / models /xgboost /gradient_boosting_regressor.py

poudel

Upload 8 files

654d76b verified 27 days ago

raw

history blame

No virus

4.47 kB

	# -- coding: utf-8 --
	"""gradient_boosting_regressor.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
	"""

	import pandas as pd
	import requests
	import numpy as np
	from sklearn.linear_model import LinearRegression
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
	from sklearn.ensemble import GradientBoostingRegressor
	from sklearn.tree import DecisionTreeRegressor
	from sklearn.preprocessing import StandardScaler, OneHotEncoder

	# the dataset I am using is from RapidApi
	api_key = '93844a03b8msh43e83be923422abp10fb67jsne048c3017988'
	url = 'https://fliteroute.p.rapidapi.com/api/gcfuelandtime/origin/GVA/dest/MIA/model/A320'
	headers = {'x-rapidapi-host': 'fliteroute.p.rapidapi.com', 'x-rapidapi-key': api_key}
	response = requests.get(url, headers = headers)
	if response.status_code == 200:
	data = response.json()
	print(data)
	else:
	print({response.status_code}, {response.text})
	# Note climbData and descendData is not being used since there is only one key entry for both features

	# Gradient Boosting Regressor
	# In here Im using the same .json dataset with a new model Gradient Boosting Regressor

	data = response.json()
	features = [feature['properties'] for feature in data['features']]
	df = pd.DataFrame(features) # extracting features for the model

	#print(df.columns)

	# numeric
	df['dist_km'] = pd.to_numeric(df['dist_km'], errors = 'coerce')
	df['dist_nm'] = pd.to_numeric(df['dist_nm'], errors = 'coerce')
	df['cruiseTime'] = pd.to_numeric(df['cruiseTime'], errors = 'coerce')
	df['fuel'] = pd.to_numeric(df['fuel'], errors = 'coerce')
	df['CO2'] = pd.to_numeric(df['CO2'], errors = 'coerce')

	df.dropna(inplace = True)
	features = df[['dist_km', 'cruiseTime', 'dist_nm', 'CO2', 'fuel']] # you can play with this and add more features I kept it simple with what I know is important
	target = df['fuel']

	features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.2, random_state = 42) # split into train and test

	model = GradientBoostingRegressor(n_estimators = 100, learning_rate = 25, max_depth = 5, random_state = 42) # can play with the hyperparameters and observe model metrics
	model.fit(features_train, target_train) # fitting the model
	target_prediction = model.predict(features_test) # predictions

	mse = mean_squared_error(target_test, target_prediction)
	r2 = r2_score(target_test, target_prediction)
	mae = mean_absolute_error(target_test, target_prediction)
	average_predicted_fuel_per_nm = (target_prediction / features_test['dist_nm']).mean()


	feature_we_want = len(target) # what we are looking for
	regression = 1 # there is only one predictor
	residual = feature_we_want - 2
	explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
	unexplained_variance = mse * feature_we_want

	F_value = (explained_variance / regression) / (unexplained_variance / residual) # calculating the F statistic for the report purposes
	p_value = 1 - f.cdf(F_value, regression, residual)
	rse = np.sqrt(mse)

	future_distance_nm = [30.90, 40, 1894.34, 23.9, 17.92] # you can change the value of future_distance_nm ['dist_km', 'cruiseTime', 'CO2', 'dist_nm']
	predicted_fuel_future = model.predict([future_distance_nm]) # you will need predicted_fuel
	average_predicted_fuel_per_nm = (target_prediction / features_test['dist_nm']).mean() # can change to "dist_km" to see the average in km

	print(f"mean squared error: {mse}") # checking the model perfomance
	print(f"R-squared: {r2}")
	print(f"mean absolute error: {mae}")
	print(f"average fuel consumption per nautical mile:: {average_predicted_fuel_per_nm:.2f} for XGBoost model")
	print(f"regression: {regression:.4f}")
	print(f"residual: {residual:.4f}")
	print(f"p-value: {p_value:.4f}") # calculating P value for the report
	print(f"standard error: {rse:.2f}")
	print(f"f-statistic: {F_value:.2f}")
	print(f"predicted fuel needed for a {future_distance_nm} nm flight: {predicted_fuel_future[0]:.2f} kg")

	# seems like the mse is verrryyy highhhhhhh but this chnages if we add or take off features
	# the Rsquare and mae have same numbers as the linear resseion model so thats good

	# added more features I am now playing with the hyperparameters the metrics go up and down based of the hyperparameters

	# mse really high, this is a bad model, rquare is a negative number