File size: 3,212 Bytes
654d76b
 
 
 
 
 
 
 
f637442
 
654d76b
f637442
 
 
 
 
 
 
 
654d76b
 
 
f637442
654d76b
 
 
f637442
654d76b
 
f637442
654d76b
 
 
 
 
 
f637442
 
654d76b
f637442
 
 
 
654d76b
f637442
654d76b
 
 
 
 
f637442
 
654d76b
 
f637442
 
 
 
 
 
654d76b
 
 
 
 
f637442
654d76b
 
f637442
 
654d76b
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# -*- coding: utf-8 -*-
"""neural_network.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1hn_e3CJx3T9jqeSZjSgcW4Dybf8sD9q9
"""

import pandas as pd
import requests
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scipy.stats import f

# Neural Network model
# Note here I am using a new dataset which Abdulelah shared with me.
# dataa filename "preprocessed_data.csv"

dataset = pd.read_csv('preprocessed_data.csv')    # using dataset Abdulelah gave me
dataset.dropna(inplace = True)
dataset.head()

feature = dataset[['distance', 'model', 'seats', 'fuel_burn', 'fuel_burn_total']]
target = dataset['fuel_burn_total']

# doing encoding
encoder = OneHotEncoder(sparse = False)
feature_encoded = pd.DataFrame(encoder.fit_transform(feature[['model']]))
feature_encoded.columns = encoder.get_feature_names_out(['model'])
feature.drop('model', axis = 1, inplace = True)
feature = pd.concat([feature.reset_index(drop = True), feature_encoded.reset_index(drop = True)], axis = 1)


feature_train, feature_test, target_train, target_test = train_test_split(feature, target, test_size = 0.1, random_state = 42)   # split into train and test
scaler = StandardScaler()
feature_train_scaled = scaler.fit_transform(feature_train)
feature_test_scaled = scaler.transform(feature_test)

# building the model
model = Sequential([
    Dense(64, activation = 'relu', input_shape = (feature_train_scaled.shape[1],)),
    Dense(64, activation = 'relu'),
    Dense(1)])  # can change dense
model.compile(optimizer = 'adam', loss = 'mean_squared_error')   # compiling model
model.fit(feature_train_scaled, target_train, epochs = 50, batch_size = 32, verbose = 1)   # training model

mse = model.evaluate(feature_test_scaled, target_test)
print("mean squared e", mse)


target_prediction = model.predict(feature_test_scaled)
r2 = r2_score(target_test, target_prediction)
mae = mean_absolute_error(target_test, target_prediction)
mse = mean_squared_error(target_test, target_prediction)

feature_we_want = len(target) # what we are looking for
regression = 1  # there is only one predictor
residual = feature_we_want - 2
explained_variance = r2 * np.sum((fuel - np.mean(fuel))**2)
unexplained_variance = mse * feature_we_want

F_value = (explained_variance / regression) / (unexplained_variance / residual)  # calculating the F statistic for the report purposes
p_value = 1 - f.cdf(F_value, regression, residual)
rse = np.sqrt(mse)

print(f"Rquared {r2}")
print(f"mean absolute e {mae}")
print(f"mean squared e {mse}")
print(f"regression: {regression:.4f}")
print(f"residual: {residual:.4f}")
print(f"p-value: {p_value:.4f}")   # calculating P value for the report
print(f"standard error: {rse:.2f}")
print(f"f-statistic: {F_value:.2f}")
# the mse difference between the predicted and actual fuel burn totals on the model is around 4.97, it it was lower it would be better

# mse is 0 now this is a good model !