Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""neural_network.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9 | |
""" | |
import pandas as pd | |
import requests | |
import numpy as np | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Dense | |
from scipy.stats import f | |
# Neural Network model | |
# Note here I am using a new dataset which Abdulelah shared with me. | |
# dataa filename "preprocessed_data.csv" | |
dataset = pd.read_csv('preprocessed_data.csv') # using dataset Abdulelah gave me | |
dataset.dropna(inplace = True) | |
dataset.head() | |
feature = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']] | |
target = dataset['fuel_burn_total'] | |
# doing encoding | |
encoder = OneHotEncoder(sparse = False) | |
feature_encoded = pd.DataFrame(encoder.fit_transform(feature[['model']])) | |
feature_encoded.columns = encoder.get_feature_names_out(['model']) | |
feature.drop('model', axis = 1, inplace = True) | |
feature = pd.concat([feature.reset_index(drop = True), feature_encoded.reset_index(drop = True)], axis = 1) | |
feature_train, feature_test, target_train, target_test = train_test_split(feature, target, test_size = 0.1, random_state = 42) # split into train and test | |
scaler = StandardScaler() | |
feature_train_scaled = scaler.fit_transform(feature_train) | |
feature_test_scaled = scaler.transform(feature_test) | |
# building the model | |
model = Sequential([ | |
Dense(64, activation = 'relu', input_shape = (feature_train_scaled.shape[1],)), | |
Dense(64, activation = 'relu'), | |
Dense(1)]) # can change dense | |
model.compile(optimizer = 'adam', loss = 'mean_squared_error') # compiling model | |
model.fit(feature_train_scaled, target_train, epochs = 50, batch_size = 32, verbose = 1) # training model | |
mse = model.evaluate(feature_test_scaled, target_test) | |
print("mean squared e", mse) | |
target_prediction = model.predict(feature_test_scaled) | |
r2 = r2_score(target_test, target_prediction) | |
mae = mean_absolute_error(target_test, target_prediction) | |
mse = mean_squared_error(target_test, target_prediction) | |
feature_we_want = len(target) # what we are looking for | |
regression = 1 # there is only one predictor | |
residual = feature_we_want - 2 | |
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2) | |
unexplained_variance = mse * feature_we_want | |
F_value = (explained_variance / regression) / (unexplained_variance / residual) # calculating the F statistic for the report purposes | |
p_value = 1 - f.cdf(F_value, regression, residual) | |
rse = np.sqrt(mse) | |
print(f"Rquared {r2}") | |
print(f"mean absolute e {mae}") | |
print(f"mean squared e {mse}") | |
print(f"regression: {regression:.4f}") | |
print(f"residual: {residual:.4f}") | |
print(f"p-value: {p_value:.4f}") # calculating P value for the report | |
print(f"standard error: {rse:.2f}") | |
print(f"f-statistic: {F_value:.2f}") | |
# the mse difference between the predicted and actual fuel burn totals on the model is around 4.97, it it was lower it would be better | |
# mse is 0 now this is a good model ! |