Spaces:
Runtime error
Runtime error
Upload 8 files
Browse files- app.py +16 -0
- best_pipeline.pkl +3 -0
- eda.py +99 -0
- list_cat_nominal_columns.txt +1 -0
- list_cat_ordinal_columns.txt +1 -0
- list_num_columns.txt +1 -0
- prediction.py +121 -0
- requirements.txt +7 -0
app.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import eda
|
4 |
+
import prediction
|
5 |
+
|
6 |
+
page = st.sidebar.selectbox("choose page: ", ("Home page","Data exploration","Data Prediction"))
|
7 |
+
|
8 |
+
if page == "Home page":
|
9 |
+
st.title("Default Payment Prediction")
|
10 |
+
st.write("Name :Dicky Gabriel")
|
11 |
+
st.write("Batch :SBY-002")
|
12 |
+
st.write("Objective : Predict Default Payment")
|
13 |
+
elif page == "Data exploration":
|
14 |
+
eda.run()
|
15 |
+
else:
|
16 |
+
prediction.run()
|
best_pipeline.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9acb821bd4b7e6e41b2e2acb705449faadef9229d54078f5ac2a49ecd9fe0b2
|
3 |
+
size 72927667
|
eda.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
from matplotlib.ticker import MultipleLocator
|
7 |
+
|
8 |
+
def run():
|
9 |
+
st.title("Flight Passenger Satisfaction Prediction")
|
10 |
+
|
11 |
+
st.subheader("Analysis Data for Flight Passenger")
|
12 |
+
|
13 |
+
st.write("This page made by Dicky Gabriel")
|
14 |
+
st.markdown("---")
|
15 |
+
|
16 |
+
df = pd.read_csv("Fligh_satification.csv")
|
17 |
+
st.dataframe(df)
|
18 |
+
df.columns = df.columns.str.lower()
|
19 |
+
|
20 |
+
st.write("## Passenger Satisfaction")
|
21 |
+
sns.set_style("whitegrid")
|
22 |
+
fig_pie, ax_pie = plt.subplots(figsize=(6, 6))
|
23 |
+
ax_pie.pie(df.satisfaction.value_counts(), labels=["Neutral or dissatisfied", "Satisfied"], autopct='%1.1f%%')
|
24 |
+
ax_pie.set_title('Perbandingan kolom satisfied dan neutral or disastified')
|
25 |
+
st.pyplot(fig_pie)
|
26 |
+
|
27 |
+
st.write("## Age Histogram")
|
28 |
+
fig_hist, ax_hist = plt.subplots(figsize=(20, 20))
|
29 |
+
ax_hist.minorticks_on()
|
30 |
+
ax_hist.xaxis.set_minor_locator(MultipleLocator(5))
|
31 |
+
ax_hist.yaxis.set_minor_locator(MultipleLocator(100))
|
32 |
+
ax_hist.set_title('Ages Histogram', size=20, fontweight='bold', y=1.04)
|
33 |
+
|
34 |
+
sns.histplot(x='age', data=df, edgecolor='black', kde=True, line_kws={'lw': 1, 'linestyle': '--'}, ax=ax_hist)
|
35 |
+
|
36 |
+
ax_hist.set_xlabel('Age', size=15)
|
37 |
+
ax_hist.set_ylabel('Count', size=15)
|
38 |
+
st.pyplot(fig_hist)
|
39 |
+
|
40 |
+
st.write("## Scatterplot of age and satisfaction")
|
41 |
+
fig_scatter, ax_scatter = plt.subplots(figsize=(20, 20))
|
42 |
+
ax_scatter.minorticks_on()
|
43 |
+
ax_scatter.xaxis.set_minor_locator(MultipleLocator(5))
|
44 |
+
ax_scatter.yaxis.set_minor_locator(MultipleLocator(100))
|
45 |
+
ax_scatter.set_title('Ages Histogram with Satisfaction', size=20, fontweight='bold', y=1.04)
|
46 |
+
|
47 |
+
sns.histplot(x='age', data=df, edgecolor='black', hue="satisfaction", kde=True,
|
48 |
+
line_kws={'lw': 1, 'linestyle': '--'}, ax=ax_scatter)
|
49 |
+
|
50 |
+
ax_scatter.set_xlabel('Age', size=15)
|
51 |
+
ax_scatter.set_ylabel('Count', size=15)
|
52 |
+
st.pyplot(fig_scatter)
|
53 |
+
|
54 |
+
st.write("## plane type")
|
55 |
+
fig_plane_type = plt.figure(figsize=(10, 5), dpi=200)
|
56 |
+
|
57 |
+
ax_plane_type = sns.countplot(x='type of travel', hue='satisfaction', data=df)
|
58 |
+
ax_plane_type.set_title('type of travel', size=15)
|
59 |
+
ax_plane_type.legend(fontsize='10')
|
60 |
+
plt.xlabel('')
|
61 |
+
plt.ylabel('Count', size=15)
|
62 |
+
|
63 |
+
st.write("## plane type")
|
64 |
+
fig_plane_type = plt.figure(figsize=(10, 5), dpi=200)
|
65 |
+
|
66 |
+
ax_plane_type = sns.countplot(x='type of travel', hue='satisfaction', data=df)
|
67 |
+
ax_plane_type.set_title('type of travel', size=15)
|
68 |
+
ax_plane_type.legend(fontsize='10')
|
69 |
+
plt.xlabel('')
|
70 |
+
plt.ylabel('Count', size=15)
|
71 |
+
|
72 |
+
|
73 |
+
for p in ax_plane_type.patches:
|
74 |
+
ax_plane_type.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
|
75 |
+
ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
|
76 |
+
textcoords='offset points')
|
77 |
+
|
78 |
+
st.pyplot(fig_plane_type)
|
79 |
+
|
80 |
+
st.write("## Class type")
|
81 |
+
fig_class_type = plt.figure(figsize=(10,5),dpi=200)
|
82 |
+
|
83 |
+
ax=sns.countplot(x='class',hue='satisfaction',data=df)
|
84 |
+
ax.set_title('class',size=15)
|
85 |
+
ax.legend(fontsize='10')
|
86 |
+
plt.xlabel('')
|
87 |
+
plt.ylabel('Count',size=15)
|
88 |
+
|
89 |
+
for p in ax.patches:
|
90 |
+
ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
|
91 |
+
ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
|
92 |
+
textcoords='offset points')
|
93 |
+
|
94 |
+
plt.show()
|
95 |
+
|
96 |
+
st.pyplot(fig_class_type)
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
run()
|
list_cat_nominal_columns.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["type of travel", "generation"]
|
list_cat_ordinal_columns.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["class", "inflight wifi service", "ease of online booking", "food and drink", "online boarding", "seat comfort", "inflight entertainment", "on-board service", "leg room service", "baggage handling", "checkin service", "inflight service", "cleanliness"]
|
list_num_columns.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["flight distance"]
|
prediction.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pickle
|
3 |
+
import json
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
with open("list_num_columns.txt", 'r') as file_1:
|
8 |
+
list_num_skew_columns = json.load(file_1)
|
9 |
+
|
10 |
+
with open("list_cat_nominal_columns.txt", "r") as file_2:
|
11 |
+
nom_col_skew = json.load(file_2)
|
12 |
+
|
13 |
+
with open("list_cat_ordinal_columns.txt", "r") as file_3:
|
14 |
+
ord_col_skew = json.load(file_3)
|
15 |
+
|
16 |
+
with open("best_pipeline.pkl", "rb") as file_4:
|
17 |
+
best_pipeline = pickle.load(file_4)
|
18 |
+
|
19 |
+
def run():
|
20 |
+
# create form
|
21 |
+
with st.form("form"):
|
22 |
+
age = st.number_input("age",
|
23 |
+
min_value= 5,
|
24 |
+
max_value= 90,
|
25 |
+
value=30,
|
26 |
+
step=2)
|
27 |
+
flight_distance = st.number_input("flight distance",
|
28 |
+
min_value= 31,
|
29 |
+
max_value= 5000,
|
30 |
+
value=850,
|
31 |
+
step=10)
|
32 |
+
departure_delay_in_minutes = st.number_input("departure delay in minutes",
|
33 |
+
min_value= 0,
|
34 |
+
max_value= 1600,
|
35 |
+
value=200,
|
36 |
+
step=10)
|
37 |
+
arrival_delay_in_minutes = st.number_input("arrival delay in minutes",
|
38 |
+
min_value= 0,
|
39 |
+
max_value= 1600,
|
40 |
+
value=200,
|
41 |
+
step=10)
|
42 |
+
|
43 |
+
st.markdown("---")
|
44 |
+
gender = st.radio("gender",("Male","Female"),index= 0)
|
45 |
+
customer_type = st.radio("customer type",("Loyal customer","disloyal customer"),index= 0)
|
46 |
+
type_of_travel = st.radio("type of travel",('Personal Travel', 'Business travel'),index= 0)
|
47 |
+
class_flight = st.radio("class flight",('Eco Plus', 'Business', 'Eco'),index= 0)
|
48 |
+
|
49 |
+
st.markdown("---")
|
50 |
+
inflight_wifi_service = st.radio("inflight_wifi_service",(0,1,2,3,4,5),index= 0)
|
51 |
+
departure_arrival_time_convenient = st.radio("departure/arrival_time_convenient",(0,1,2,3,4,5),index= 0)
|
52 |
+
ease_of_online_booking = st.radio("ease_of_online_booking",(0,1,2,3,4,5),index= 0)
|
53 |
+
gate_location = st.radio("gate_location",(0,1,2,3,4,5),index= 0)
|
54 |
+
food_and_drink = st.radio("food_and_drink",(0,1,2,3,4,5),index= 0)
|
55 |
+
online_boarding = st.radio("online_boarding",(0,1,2,3,4,5),index= 0)
|
56 |
+
seat_comfort = st.radio("seat_comfort",(0,1,2,3,4,5),index= 0)
|
57 |
+
inflight_entertainment = st.radio("inflight_entertainment",(0,1,2,3,4,5),index= 0)
|
58 |
+
on_board_service = st.radio("on_board_service",(0,1,2,3,4,5),index= 0)
|
59 |
+
leg_room_service = st.radio("leg_room_service",(0,1,2,3,4,5),index= 0)
|
60 |
+
baggage_handling = st.radio("baggage_handling",(0,1,2,3,4,5),index= 0)
|
61 |
+
checkin_service = st.radio("checkin_service",(0,1,2,3,4,5),index= 0)
|
62 |
+
inflight_service = st.radio("inflight_service",(0,1,2,3,4,5),index= 0)
|
63 |
+
cleanliness = st.radio("cleanliness",(0,1,2,3,4,5),index= 0)
|
64 |
+
st.markdown("---")
|
65 |
+
|
66 |
+
submitted = st.form_submit_button("predict")
|
67 |
+
|
68 |
+
data_inf = {
|
69 |
+
"gender" : gender,
|
70 |
+
"customer type" : customer_type,
|
71 |
+
"age" : age,
|
72 |
+
"type of travel" : type_of_travel,
|
73 |
+
"class" : class_flight,
|
74 |
+
"inflight wifi service" : inflight_wifi_service,
|
75 |
+
"departure/arrival time convenient" : departure_arrival_time_convenient,
|
76 |
+
"ease of online booking" : ease_of_online_booking,
|
77 |
+
"gate location" : gate_location,
|
78 |
+
"food and drink" : food_and_drink,
|
79 |
+
"online boarding" : online_boarding,
|
80 |
+
"seat comfort" : seat_comfort,
|
81 |
+
"inflight entertainment" : inflight_entertainment,
|
82 |
+
"on-board service" : on_board_service,
|
83 |
+
"leg room service" : leg_room_service,
|
84 |
+
"baggage handling" : baggage_handling,
|
85 |
+
"checkin service" : checkin_service,
|
86 |
+
"inflight service" : inflight_service,
|
87 |
+
"cleanliness" : cleanliness,
|
88 |
+
"flight distance" : flight_distance,
|
89 |
+
"departure delay in minutes" : departure_delay_in_minutes,
|
90 |
+
"arrival delay in minutes" : arrival_delay_in_minutes
|
91 |
+
}
|
92 |
+
|
93 |
+
data_inf = pd.DataFrame([data_inf])
|
94 |
+
st.dataframe(data_inf)
|
95 |
+
|
96 |
+
|
97 |
+
age_category = []
|
98 |
+
for x in data_inf["age"]:
|
99 |
+
if 6 <= x <= 21:
|
100 |
+
age_category.append('Generation z')
|
101 |
+
elif 22 <= x <= 36:
|
102 |
+
age_category.append('Millennials')
|
103 |
+
elif 37 <= x <= 52:
|
104 |
+
age_category.append('Generation X')
|
105 |
+
elif 53 <= x <= 73:
|
106 |
+
age_category.append('Baby Boomers')
|
107 |
+
else:
|
108 |
+
age_category.append('Silent Generation')
|
109 |
+
|
110 |
+
data_inf["generation"] = age_category
|
111 |
+
|
112 |
+
if submitted:
|
113 |
+
data_inf_num_skew = data_inf[list_num_skew_columns]
|
114 |
+
data_inf_cat_nom = data_inf[nom_col_skew]
|
115 |
+
data_inf_cat_ord = data_inf[ord_col_skew]
|
116 |
+
y_predict_inf = best_pipeline.predict(data_inf)
|
117 |
+
|
118 |
+
st.write("# Satisfaction: ", str(y_predict_inf[0]))
|
119 |
+
|
120 |
+
if __name__=="__main__":
|
121 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
matplotlib
|
5 |
+
seaborn
|
6 |
+
plotly
|
7 |
+
scikit-learn
|