poudel's picture
Upload 8 files
654d76b verified
raw
history blame
No virus
3.21 kB
# -*- coding: utf-8 -*-
"""neural_network.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
"""
import pandas as pd
import requests
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scipy.stats import f
# Neural Network model
# Note here I am using a new dataset which Abdulelah shared with me.
# dataa filename "preprocessed_data.csv"
dataset = pd.read_csv('preprocessed_data.csv') # using dataset Abdulelah gave me
dataset.dropna(inplace = True)
dataset.head()
feature = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
target = dataset['fuel_burn_total']
# doing encoding
encoder = OneHotEncoder(sparse = False)
feature_encoded = pd.DataFrame(encoder.fit_transform(feature[['model']]))
feature_encoded.columns = encoder.get_feature_names_out(['model'])
feature.drop('model', axis = 1, inplace = True)
feature = pd.concat([feature.reset_index(drop = True), feature_encoded.reset_index(drop = True)], axis = 1)
feature_train, feature_test, target_train, target_test = train_test_split(feature, target, test_size = 0.1, random_state = 42) # split into train and test
scaler = StandardScaler()
feature_train_scaled = scaler.fit_transform(feature_train)
feature_test_scaled = scaler.transform(feature_test)
# building the model
model = Sequential([
Dense(64, activation = 'relu', input_shape = (feature_train_scaled.shape[1],)),
Dense(64, activation = 'relu'),
Dense(1)]) # can change dense
model.compile(optimizer = 'adam', loss = 'mean_squared_error') # compiling model
model.fit(feature_train_scaled, target_train, epochs = 50, batch_size = 32, verbose = 1) # training model
mse = model.evaluate(feature_test_scaled, target_test)
print("mean squared e", mse)
target_prediction = model.predict(feature_test_scaled)
r2 = r2_score(target_test, target_prediction)
mae = mean_absolute_error(target_test, target_prediction)
mse = mean_squared_error(target_test, target_prediction)
feature_we_want = len(target) # what we are looking for
regression = 1 # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want
F_value = (explained_variance / regression) / (unexplained_variance / residual) # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)
print(f"Rquared {r2}")
print(f"mean absolute e {mae}")
print(f"mean squared e {mse}")
print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}") # calculating P value for the report
print(f"standard error: {rse:.2f}")
print(f"f-statistic: {F_value:.2f}")
# the mse difference between the predicted and actual fuel burn totals on the model is around 4.97, it it was lower it would be better
# mse is 0 now this is a good model !