diff --git "a/may-22-binary-classification.ipynb" "b/may-22-binary-classification.ipynb" new file mode 100755--- /dev/null +++ "b/may-22-binary-classification.ipynb" @@ -0,0 +1,3518 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5e4ce795", + "metadata": { + "papermill": { + "duration": 0.033432, + "end_time": "2022-05-11T11:03:04.321959", + "exception": false, + "start_time": "2022-05-11T11:03:04.288527", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Problem statement\n", + "\n", + "The May edition of the 2022 Tabular Playground series binary classification problem that includes a number of different feature interactions. This competition is an opportunity to explore various methods for identifying and exploiting these feature interactions." + ] + }, + { + "cell_type": "markdown", + "id": "af70a983", + "metadata": { + "papermill": { + "duration": 0.032628, + "end_time": "2022-05-11T11:03:04.386417", + "exception": false, + "start_time": "2022-05-11T11:03:04.353789", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a8932043", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:04.453540Z", + "iopub.status.busy": "2022-05-11T11:03:04.452968Z", + "iopub.status.idle": "2022-05-11T11:03:05.559143Z", + "shell.execute_reply": "2022-05-11T11:03:05.558125Z" + }, + "papermill": { + "duration": 1.143269, + "end_time": "2022-05-11T11:03:05.561715", + "exception": false, + "start_time": "2022-05-11T11:03:04.418446", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "66d2b4ae", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:05.627534Z", + "iopub.status.busy": "2022-05-11T11:03:05.627253Z", + "iopub.status.idle": "2022-05-11T11:03:05.633896Z", + "shell.execute_reply": "2022-05-11T11:03:05.633045Z" + }, + "papermill": { + "duration": 0.043253, + "end_time": "2022-05-11T11:03:05.637363", + "exception": false, + "start_time": "2022-05-11T11:03:05.594110", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/kaggle/input/tabular-playground-series-may-2022/sample_submission.csv\n", + "/kaggle/input/tabular-playground-series-may-2022/train.csv\n", + "/kaggle/input/tabular-playground-series-may-2022/test.csv\n" + ] + } + ], + "source": [ + "import os\n", + "for dirname, _, filenames in os.walk('/kaggle/input'):\n", + " for filename in filenames:\n", + " print(os.path.join(dirname, filename))" + ] + }, + { + "cell_type": "markdown", + "id": "be1e26a5", + "metadata": { + "papermill": { + "duration": 0.035619, + "end_time": "2022-05-11T11:03:05.708148", + "exception": false, + "start_time": "2022-05-11T11:03:05.672529", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Read files" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1c56f757", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:05.776270Z", + "iopub.status.busy": "2022-05-11T11:03:05.775581Z", + "iopub.status.idle": "2022-05-11T11:03:21.346102Z", + "shell.execute_reply": "2022-05-11T11:03:21.345294Z" + }, + "papermill": { + "duration": 15.60683, + "end_time": "2022-05-11T11:03:21.348390", + "exception": false, + "start_time": "2022-05-11T11:03:05.741560", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "train = pd.read_csv(\"/kaggle/input/tabular-playground-series-may-2022/train.csv\")\n", + "test = pd.read_csv(\"/kaggle/input/tabular-playground-series-may-2022/test.csv\")\n", + "submission = pd.read_csv(\"/kaggle/input/tabular-playground-series-may-2022/sample_submission.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7dc3af51", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:21.416502Z", + "iopub.status.busy": "2022-05-11T11:03:21.416126Z", + "iopub.status.idle": "2022-05-11T11:03:21.717153Z", + "shell.execute_reply": "2022-05-11T11:03:21.716495Z" + }, + "papermill": { + "duration": 0.338273, + "end_time": "2022-05-11T11:03:21.719304", + "exception": false, + "start_time": "2022-05-11T11:03:21.381031", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idf_00f_01f_02f_03f_04f_05f_06f_07f_08...f_22f_23f_24f_25f_26f_27f_28f_29f_30target
00-1.3732460.238887-0.2433760.567405-0.6477150.8393260.11313315...-2.5407390.766952-2.730628-0.2081771.363402ABABDADBAB67.609153000
111.697021-1.710322-2.230332-0.5456611.113173-1.5521750.44782513...2.278315-0.633658-1.217077-3.782194-0.058316ACACCADCEB377.096415001
221.6817260.616746-1.0276890.810492-0.6090860.113965-0.70866010...-1.385775-0.520558-0.0091212.788536-3.703488AAAEABCKAD-195.599702021
33-0.118172-0.587835-0.8046382.0868220.371005-0.128831-0.28257532...0.572594-1.6532131.686035-2.533098-0.608601BDBBAACBCB210.826205001
441.148481-0.176567-0.664871-1.1013430.4678750.5001170.40751533...-3.912929-1.4303662.127649-3.3067844.371371BDBCBBCHFE-217.211798011
..................................................................
8999958999951.380145-0.0388840.5971110.8545600.684301-1.0586181.31069921...-1.5947440.5220190.8330472.7141251.290094BABBCBBBED455.033851021
899996899996-1.3697890.0448410.0154580.376565-0.380529-0.830815-1.79845841...2.413899-0.674942-0.412111-0.030436-3.144047BBBGBBDQBE134.703577010
8999978999971.386201-0.9611500.725994-0.1328440.873911-0.245339-1.04578600...-0.151930-4.560773-1.2491541.7935352.253696AEBEDBBHBA-99.536313010
899998899998-1.590572-0.509938-1.715397-0.2499881.3599331.650808-0.05859202...2.4236702.1100080.561271-2.1496101.019982ADBAAADDAE47.823039120
899999899999-0.636210-0.425986-1.826699-0.5987971.589577-0.482298-0.21409371...1.3406963.7623511.797137-0.4128372.090440BCAACADSCE-44.559296021
\n", + "

900000 rows × 33 columns

\n", + "
" + ], + "text/plain": [ + " id f_00 f_01 f_02 f_03 f_04 f_05 \\\n", + "0 0 -1.373246 0.238887 -0.243376 0.567405 -0.647715 0.839326 \n", + "1 1 1.697021 -1.710322 -2.230332 -0.545661 1.113173 -1.552175 \n", + "2 2 1.681726 0.616746 -1.027689 0.810492 -0.609086 0.113965 \n", + "3 3 -0.118172 -0.587835 -0.804638 2.086822 0.371005 -0.128831 \n", + "4 4 1.148481 -0.176567 -0.664871 -1.101343 0.467875 0.500117 \n", + "... ... ... ... ... ... ... ... \n", + "899995 899995 1.380145 -0.038884 0.597111 0.854560 0.684301 -1.058618 \n", + "899996 899996 -1.369789 0.044841 0.015458 0.376565 -0.380529 -0.830815 \n", + "899997 899997 1.386201 -0.961150 0.725994 -0.132844 0.873911 -0.245339 \n", + "899998 899998 -1.590572 -0.509938 -1.715397 -0.249988 1.359933 1.650808 \n", + "899999 899999 -0.636210 -0.425986 -1.826699 -0.598797 1.589577 -0.482298 \n", + "\n", + " f_06 f_07 f_08 ... f_22 f_23 f_24 f_25 \\\n", + "0 0.113133 1 5 ... -2.540739 0.766952 -2.730628 -0.208177 \n", + "1 0.447825 1 3 ... 2.278315 -0.633658 -1.217077 -3.782194 \n", + "2 -0.708660 1 0 ... -1.385775 -0.520558 -0.009121 2.788536 \n", + "3 -0.282575 3 2 ... 0.572594 -1.653213 1.686035 -2.533098 \n", + "4 0.407515 3 3 ... -3.912929 -1.430366 2.127649 -3.306784 \n", + "... ... ... ... ... ... ... ... ... \n", + "899995 1.310699 2 1 ... -1.594744 0.522019 0.833047 2.714125 \n", + "899996 -1.798458 4 1 ... 2.413899 -0.674942 -0.412111 -0.030436 \n", + "899997 -1.045786 0 0 ... -0.151930 -4.560773 -1.249154 1.793535 \n", + "899998 -0.058592 0 2 ... 2.423670 2.110008 0.561271 -2.149610 \n", + "899999 -0.214093 7 1 ... 1.340696 3.762351 1.797137 -0.412837 \n", + "\n", + " f_26 f_27 f_28 f_29 f_30 target \n", + "0 1.363402 ABABDADBAB 67.609153 0 0 0 \n", + "1 -0.058316 ACACCADCEB 377.096415 0 0 1 \n", + "2 -3.703488 AAAEABCKAD -195.599702 0 2 1 \n", + "3 -0.608601 BDBBAACBCB 210.826205 0 0 1 \n", + "4 4.371371 BDBCBBCHFE -217.211798 0 1 1 \n", + "... ... ... ... ... ... ... \n", + "899995 1.290094 BABBCBBBED 455.033851 0 2 1 \n", + "899996 -3.144047 BBBGBBDQBE 134.703577 0 1 0 \n", + "899997 2.253696 AEBEDBBHBA -99.536313 0 1 0 \n", + "899998 1.019982 ADBAAADDAE 47.823039 1 2 0 \n", + "899999 2.090440 BCAACADSCE -44.559296 0 2 1 \n", + "\n", + "[900000 rows x 33 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "56d9b5e5", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:21.789492Z", + "iopub.status.busy": "2022-05-11T11:03:21.788702Z", + "iopub.status.idle": "2022-05-11T11:03:22.024768Z", + "shell.execute_reply": "2022-05-11T11:03:22.023935Z" + }, + "papermill": { + "duration": 0.272034, + "end_time": "2022-05-11T11:03:22.026588", + "exception": false, + "start_time": "2022-05-11T11:03:21.754554", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idf_00f_01f_02f_03f_04f_05f_06f_07f_08...f_21f_22f_23f_24f_25f_26f_27f_28f_29f_30
09000000.4425170.174380-0.9998160.7627410.186778-1.0747750.50188866...-1.006400-1.193879-2.435736-2.427430-1.9668875.734205BAAABADLAC99.47841900
1900001-0.605598-0.3057150.627667-0.578898-1.7509311.355550-0.19091113...2.3824050.1494421.883322-2.848714-0.7251553.194219AFABBAEGCB-65.99382510
29000020.3039902.4451100.2465150.8182480.359731-1.3318451.35862233...-7.0260981.312277-5.1571921.7140050.5850320.066898BBACABBKEE-87.40562201
39000030.1540530.260126-1.367092-0.093175-1.111034-0.9484811.11922000...-0.594532-3.9394751.754570-2.364007-1.0033203.893099AEBEAACQCC-281.29346000
4900004-1.651904-0.424266-0.667356-0.322124-0.0894620.1817051.78498322...0.084906-0.985736-0.130467-3.5578931.2106871.861884AEBBBBDABF25.62941502
..................................................................
69999515999950.6401100.897808-0.5239561.563760-0.092281-0.6108670.53542601...2.6040481.1228670.5181101.2438370.5751110.076372BCBCEBHMCD204.18653900
6999961599996-0.191771-0.035246-0.1185330.5847502.1269770.568659-0.05266343...3.0298571.384682-1.1357402.982713-1.5117602.225218BAABCADQFC-97.69459102
6999971599997-0.331704-0.328845-1.1855031.022128-0.483099-0.107146-0.96828111...4.021273-1.8452661.096011-2.734508-4.885955-2.248739AAAJCBGQBA130.62274510
6999981599998-2.031073-1.2383980.964699-1.0459500.9060640.634301-0.70747451...1.453864-1.6966061.0189951.973697-0.353068-3.333449BCBBCABNDE-364.62514800
6999991599999-0.085906-0.0021242.2273750.2171453.179153-1.6601880.89198903...-3.549082-4.325318-5.0172210.251268-3.236026-0.362070AFBEBACHFF-155.41734201
\n", + "

700000 rows × 32 columns

\n", + "
" + ], + "text/plain": [ + " id f_00 f_01 f_02 f_03 f_04 f_05 \\\n", + "0 900000 0.442517 0.174380 -0.999816 0.762741 0.186778 -1.074775 \n", + "1 900001 -0.605598 -0.305715 0.627667 -0.578898 -1.750931 1.355550 \n", + "2 900002 0.303990 2.445110 0.246515 0.818248 0.359731 -1.331845 \n", + "3 900003 0.154053 0.260126 -1.367092 -0.093175 -1.111034 -0.948481 \n", + "4 900004 -1.651904 -0.424266 -0.667356 -0.322124 -0.089462 0.181705 \n", + "... ... ... ... ... ... ... ... \n", + "699995 1599995 0.640110 0.897808 -0.523956 1.563760 -0.092281 -0.610867 \n", + "699996 1599996 -0.191771 -0.035246 -0.118533 0.584750 2.126977 0.568659 \n", + "699997 1599997 -0.331704 -0.328845 -1.185503 1.022128 -0.483099 -0.107146 \n", + "699998 1599998 -2.031073 -1.238398 0.964699 -1.045950 0.906064 0.634301 \n", + "699999 1599999 -0.085906 -0.002124 2.227375 0.217145 3.179153 -1.660188 \n", + "\n", + " f_06 f_07 f_08 ... f_21 f_22 f_23 f_24 \\\n", + "0 0.501888 6 6 ... -1.006400 -1.193879 -2.435736 -2.427430 \n", + "1 -0.190911 1 3 ... 2.382405 0.149442 1.883322 -2.848714 \n", + "2 1.358622 3 3 ... -7.026098 1.312277 -5.157192 1.714005 \n", + "3 1.119220 0 0 ... -0.594532 -3.939475 1.754570 -2.364007 \n", + "4 1.784983 2 2 ... 0.084906 -0.985736 -0.130467 -3.557893 \n", + "... ... ... ... ... ... ... ... ... \n", + "699995 0.535426 0 1 ... 2.604048 1.122867 0.518110 1.243837 \n", + "699996 -0.052663 4 3 ... 3.029857 1.384682 -1.135740 2.982713 \n", + "699997 -0.968281 1 1 ... 4.021273 -1.845266 1.096011 -2.734508 \n", + "699998 -0.707474 5 1 ... 1.453864 -1.696606 1.018995 1.973697 \n", + "699999 0.891989 0 3 ... -3.549082 -4.325318 -5.017221 0.251268 \n", + "\n", + " f_25 f_26 f_27 f_28 f_29 f_30 \n", + "0 -1.966887 5.734205 BAAABADLAC 99.478419 0 0 \n", + "1 -0.725155 3.194219 AFABBAEGCB -65.993825 1 0 \n", + "2 0.585032 0.066898 BBACABBKEE -87.405622 0 1 \n", + "3 -1.003320 3.893099 AEBEAACQCC -281.293460 0 0 \n", + "4 1.210687 1.861884 AEBBBBDABF 25.629415 0 2 \n", + "... ... ... ... ... ... ... \n", + "699995 0.575111 0.076372 BCBCEBHMCD 204.186539 0 0 \n", + "699996 -1.511760 2.225218 BAABCADQFC -97.694591 0 2 \n", + "699997 -4.885955 -2.248739 AAAJCBGQBA 130.622745 1 0 \n", + "699998 -0.353068 -3.333449 BCBBCABNDE -364.625148 0 0 \n", + "699999 -3.236026 -0.362070 AFBEBACHFF -155.417342 0 1 \n", + "\n", + "[700000 rows x 32 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d6f2b103", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:22.097149Z", + "iopub.status.busy": "2022-05-11T11:03:22.096385Z", + "iopub.status.idle": "2022-05-11T11:03:22.108954Z", + "shell.execute_reply": "2022-05-11T11:03:22.107976Z" + }, + "papermill": { + "duration": 0.049962, + "end_time": "2022-05-11T11:03:22.110994", + "exception": false, + "start_time": "2022-05-11T11:03:22.061032", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtarget
09000000.5
19000010.5
29000020.5
39000030.5
49000040.5
.........
69999515999950.5
69999615999960.5
69999715999970.5
69999815999980.5
69999915999990.5
\n", + "

700000 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " id target\n", + "0 900000 0.5\n", + "1 900001 0.5\n", + "2 900002 0.5\n", + "3 900003 0.5\n", + "4 900004 0.5\n", + "... ... ...\n", + "699995 1599995 0.5\n", + "699996 1599996 0.5\n", + "699997 1599997 0.5\n", + "699998 1599998 0.5\n", + "699999 1599999 0.5\n", + "\n", + "[700000 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "submission" + ] + }, + { + "cell_type": "markdown", + "id": "52d8140f", + "metadata": { + "papermill": { + "duration": 0.035034, + "end_time": "2022-05-11T11:03:22.180689", + "exception": false, + "start_time": "2022-05-11T11:03:22.145655", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Analyse" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "68db965c", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:22.251536Z", + "iopub.status.busy": "2022-05-11T11:03:22.251122Z", + "iopub.status.idle": "2022-05-11T11:03:22.429558Z", + "shell.execute_reply": "2022-05-11T11:03:22.428055Z" + }, + "papermill": { + "duration": 0.216562, + "end_time": "2022-05-11T11:03:22.431705", + "exception": false, + "start_time": "2022-05-11T11:03:22.215143", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 900000 entries, 0 to 899999\n", + "Data columns (total 33 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 900000 non-null int64 \n", + " 1 f_00 900000 non-null float64\n", + " 2 f_01 900000 non-null float64\n", + " 3 f_02 900000 non-null float64\n", + " 4 f_03 900000 non-null float64\n", + " 5 f_04 900000 non-null float64\n", + " 6 f_05 900000 non-null float64\n", + " 7 f_06 900000 non-null float64\n", + " 8 f_07 900000 non-null int64 \n", + " 9 f_08 900000 non-null int64 \n", + " 10 f_09 900000 non-null int64 \n", + " 11 f_10 900000 non-null int64 \n", + " 12 f_11 900000 non-null int64 \n", + " 13 f_12 900000 non-null int64 \n", + " 14 f_13 900000 non-null int64 \n", + " 15 f_14 900000 non-null int64 \n", + " 16 f_15 900000 non-null int64 \n", + " 17 f_16 900000 non-null int64 \n", + " 18 f_17 900000 non-null int64 \n", + " 19 f_18 900000 non-null int64 \n", + " 20 f_19 900000 non-null float64\n", + " 21 f_20 900000 non-null float64\n", + " 22 f_21 900000 non-null float64\n", + " 23 f_22 900000 non-null float64\n", + " 24 f_23 900000 non-null float64\n", + " 25 f_24 900000 non-null float64\n", + " 26 f_25 900000 non-null float64\n", + " 27 f_26 900000 non-null float64\n", + " 28 f_27 900000 non-null object \n", + " 29 f_28 900000 non-null float64\n", + " 30 f_29 900000 non-null int64 \n", + " 31 f_30 900000 non-null int64 \n", + " 32 target 900000 non-null int64 \n", + "dtypes: float64(16), int64(16), object(1)\n", + "memory usage: 226.6+ MB\n" + ] + } + ], + "source": [ + "train.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ac090171", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:22.505768Z", + "iopub.status.busy": "2022-05-11T11:03:22.505490Z", + "iopub.status.idle": "2022-05-11T11:03:23.698698Z", + "shell.execute_reply": "2022-05-11T11:03:23.697778Z" + }, + "papermill": { + "duration": 1.231653, + "end_time": "2022-05-11T11:03:23.700854", + "exception": false, + "start_time": "2022-05-11T11:03:22.469201", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idf_00f_01f_02f_03f_04f_05f_06f_07f_08...f_21f_22f_23f_24f_25f_26f_28f_29f_30target
count900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000...900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000900000.000000
mean449999.500000-0.0002860.0011650.001174-0.001368-0.0005710.000284-0.0007092.0314602.057998...-0.156307-0.009273-0.369459-0.3427380.1765490.357591-0.3808760.3456611.0026540.486488
std259807.7654730.9988880.9991931.0005141.0001751.0001670.9998750.9999421.6561721.590955...2.4847062.4507972.4534052.3869412.4169592.476020238.7730540.4755840.8189890.499818
min0.000000-4.599856-4.682199-4.642676-4.658816-4.748501-4.750214-4.8429190.0000000.000000...-13.310146-11.853530-12.301097-11.416189-11.918306-14.300577-1229.7530520.0000000.0000000.000000
25%224999.750000-0.675490-0.675162-0.674369-0.676114-0.675909-0.673437-0.6748761.0000001.000000...-1.820063-1.645585-2.019739-1.955956-1.440424-1.261598-159.4274180.0000000.0000000.000000
50%449999.5000000.0011440.0020140.002218-0.002227-0.001662-0.000438-0.0014922.0000002.000000...-0.1526680.030850-0.390966-0.3407460.1609120.404212-0.5198080.0000001.0000000.000000
75%674999.2500000.6743370.6750210.6775050.6725440.6737890.6750280.6747493.0000003.000000...1.5070711.6616761.2554081.2666731.7959282.028219158.9873571.0000002.0000001.000000
max899999.0000004.7493014.8156994.9619824.4549204.9489834.9718814.82266815.00000016.000000...14.45542611.34408012.24710012.38984412.52917912.9130411229.5625771.0000002.0000001.000000
\n", + "

8 rows × 32 columns

\n", + "
" + ], + "text/plain": [ + " id f_00 f_01 f_02 \\\n", + "count 900000.000000 900000.000000 900000.000000 900000.000000 \n", + "mean 449999.500000 -0.000286 0.001165 0.001174 \n", + "std 259807.765473 0.998888 0.999193 1.000514 \n", + "min 0.000000 -4.599856 -4.682199 -4.642676 \n", + "25% 224999.750000 -0.675490 -0.675162 -0.674369 \n", + "50% 449999.500000 0.001144 0.002014 0.002218 \n", + "75% 674999.250000 0.674337 0.675021 0.677505 \n", + "max 899999.000000 4.749301 4.815699 4.961982 \n", + "\n", + " f_03 f_04 f_05 f_06 \\\n", + "count 900000.000000 900000.000000 900000.000000 900000.000000 \n", + "mean -0.001368 -0.000571 0.000284 -0.000709 \n", + "std 1.000175 1.000167 0.999875 0.999942 \n", + "min -4.658816 -4.748501 -4.750214 -4.842919 \n", + "25% -0.676114 -0.675909 -0.673437 -0.674876 \n", + "50% -0.002227 -0.001662 -0.000438 -0.001492 \n", + "75% 0.672544 0.673789 0.675028 0.674749 \n", + "max 4.454920 4.948983 4.971881 4.822668 \n", + "\n", + " f_07 f_08 ... f_21 f_22 \\\n", + "count 900000.000000 900000.000000 ... 900000.000000 900000.000000 \n", + "mean 2.031460 2.057998 ... -0.156307 -0.009273 \n", + "std 1.656172 1.590955 ... 2.484706 2.450797 \n", + "min 0.000000 0.000000 ... -13.310146 -11.853530 \n", + "25% 1.000000 1.000000 ... -1.820063 -1.645585 \n", + "50% 2.000000 2.000000 ... -0.152668 0.030850 \n", + "75% 3.000000 3.000000 ... 1.507071 1.661676 \n", + "max 15.000000 16.000000 ... 14.455426 11.344080 \n", + "\n", + " f_23 f_24 f_25 f_26 \\\n", + "count 900000.000000 900000.000000 900000.000000 900000.000000 \n", + "mean -0.369459 -0.342738 0.176549 0.357591 \n", + "std 2.453405 2.386941 2.416959 2.476020 \n", + "min -12.301097 -11.416189 -11.918306 -14.300577 \n", + "25% -2.019739 -1.955956 -1.440424 -1.261598 \n", + "50% -0.390966 -0.340746 0.160912 0.404212 \n", + "75% 1.255408 1.266673 1.795928 2.028219 \n", + "max 12.247100 12.389844 12.529179 12.913041 \n", + "\n", + " f_28 f_29 f_30 target \n", + "count 900000.000000 900000.000000 900000.000000 900000.000000 \n", + "mean -0.380876 0.345661 1.002654 0.486488 \n", + "std 238.773054 0.475584 0.818989 0.499818 \n", + "min -1229.753052 0.000000 0.000000 0.000000 \n", + "25% -159.427418 0.000000 0.000000 0.000000 \n", + "50% -0.519808 0.000000 1.000000 0.000000 \n", + "75% 158.987357 1.000000 2.000000 1.000000 \n", + "max 1229.562577 1.000000 2.000000 1.000000 \n", + "\n", + "[8 rows x 32 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "56e154be", + "metadata": { + "papermill": { + "duration": 0.035465, + "end_time": "2022-05-11T11:03:23.773992", + "exception": false, + "start_time": "2022-05-11T11:03:23.738527", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Analyse target" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8ff4e8b1", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:23.846653Z", + "iopub.status.busy": "2022-05-11T11:03:23.846387Z", + "iopub.status.idle": "2022-05-11T11:03:26.891259Z", + "shell.execute_reply": "2022-05-11T11:03:26.890221Z" + }, + "papermill": { + "duration": 3.083778, + "end_time": "2022-05-11T11:03:26.893405", + "exception": false, + "start_time": "2022-05-11T11:03:23.809627", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAc+ElEQVR4nO3deXQc5Znv8e+jzZuMt5Z3G2+SwTZgG4GJbWzCvi8DyUDClstgJgm5IWQyh2Fy7jAzN7mZmYSceycbZsJAGAYYIGAPOwZjY4MB2Rhb3mWDd2vxvspanvtHt4gRWqolVXeL+n3O0VF3VXW/T7mtX1e9VfWWuTsiIhIdWekuQEREUkvBLyISMQp+EZGIUfCLiESMgl9EJGJy0l1AELFYzEeMGJHuMkREOpWlS5dWuXtB4+mdIvhHjBhBSUlJussQEelUzGxzU9PV1SMiEjEKfhGRiFHwi4hEjIJfRCRiFPwiIhGj4BcRiRgFv4hIxIQW/GY2zMzmm9lqM1tlZt9PTH/AzLab2fLEz+Vh1SAiIl8U5gVctcAP3X2ZmfUElprZG4l5v3T3n4fYtoiINCO04Hf3ncDOxOODZrYGGBJWe+n0n+9v+cK0b0wZnoZKRERal5I+fjMbAUwC3k9MutvMVpjZI2bWp5nXzDKzEjMrqaysTEWZIiKREHrwm1k+8Bxwj7sfAH4LjAYmEt8j+EVTr3P32e5e7O7FBQVfGGNIRETaKNTgN7Nc4qH/hLv/EcDdy929zt3rgYeBs8OsQUREPi/Ms3oM+D2wxt0fPGH6oBMWuw4oDasGERH5ojDP6pkG3AKsNLPliWn3AzeZ2UTAgU+Bu0KsQUREGgnzrJ5FgDUx6+Ww2hQRkdbpyl0RkYhR8IuIRIyCX0QkYhT8IiIR0yluti4ikm5fpqFZtMUvIhIxCn4RkYhR8IuIRIyCX0QkYhT8IiIRo+AXEYkYBb+ISMQo+EVEIkbBLyISMQp+EZGIUfCLiESMgl9EJGIU/CIiEaPgFxGJGAW/iEjEKPhFRCJGwS8iEjEKfhGRiFHwi4hEjIJfRCRiFPwiIhGj4BcRiRgFv4hIxCj4RUQiRsEvIhIxCn4RkYhR8IuIRExowW9mw8xsvpmtNrNVZvb9xPS+ZvaGmW1I/O4TVg0iIvJFYW7x1wI/dPdxwDnAd81sHHAf8Ka7FwJvJp6LiEiKhBb87r7T3ZclHh8E1gBDgGuAxxKLPQZcG1YNIiLyRSnp4zezEcAk4H1ggLvvTMzaBQxo5jWzzKzEzEoqKytTUaaISCSEHvxmlg88B9zj7gdOnOfuDnhTr3P32e5e7O7FBQUFYZcpIhIZoQa/meUSD/0n3P2PicnlZjYoMX8QUBFmDSIi8nlhntVjwO+BNe7+4Amz5gK3JR7fBswJqwYREfminBDfexpwC7DSzJYnpt0P/Az4LzO7A9gMfD3EGkREpJHQgt/dFwHWzOwLwmpXRERapit3RUQiRsEvIhIxCn4RkYhR8IuIRIyCX0QkYhT8IiIRo+AXEYkYBb+ISMQo+EVEIkbBLyISMQp+EZGIUfCLiESMgl9EJGIU/CIiEaPgFxGJGAW/iEjEKPhFRCJGwS8iEjEKfhGRiFHwi4hEjIJfRCRiFPwiIhGj4BcRiRgFv4hIxCj4RUQiRsEvIhIxCn4RkYhR8IuIRIyCX0QkYhT8IiIRo+AXEYkYBb+ISMSEFvxm9oiZVZhZ6QnTHjCz7Wa2PPFzeVjti4hI08Lc4n8UuLSJ6b9094mJn5dDbF9ERJoQKPjN7I9mdoWZBf6icPeFwJ42VyYiIqEIGuS/Ab4BbDCzn5nZ2Ha0ebeZrUh0BfVpbiEzm2VmJWZWUllZ2Y7mRETkRIGC393nufs3gcnAp8A8M3vXzL5lZrlJtPdbYDQwEdgJ/KKFNme7e7G7FxcUFCTRhIiItCRw142Z9QNuB/4C+Aj4v8S/CN4I+h7uXu7ude5eDzwMnJ1UtSIi0m45QRYys+eBscDjwFXuvjMx62kzKwnamJkNOuG11wGlLS0vIiIdL1DwAw83PgPHzLq4e7W7Fzf1AjN7EjgPiJnZNuDvgPPMbCLgxLuM7mpj3SIi0kZBg/9/A41PvXyPeFdPk9z9piYm/z5geyIiEpIWg9/MBgJDgG5mNgmwxKyTgO4h1yYiIiFobYv/EuIHdIcCD54w/SBwf0g1iYhIiFoMfnd/DHjMzK539+dSVJOIiISota6em939P4ARZnZv4/nu/mATLxMRkQzWWldPj8Tv/LALERGR1Gitq+ehxO+/T005IiIStqCDtP2zmZ1kZrlm9qaZVZrZzWEXJyIiHS/okA0Xu/sB4EriF16NAX4UVlEiIhKeoMHf0CV0BfCMu+8PqR4REQlZ0Ct3XzSztcBR4NtmVgAcC68sEREJS9Bhme8DpgLF7l4DHAauCbMwEREJR9AtfoBTiJ/Pf+Jr/tDB9YiISMiCDsv8OPEbqCwH6hKTHQW/iEinE3SLvxgY5+4eZjEiIhK+oGf1lAIDwyxERERSI+gWfwxYbWYfANUNE9396lCqEhGR0AQN/gfCLEJERFInUPC7+wIzOxkodPd5ZtYdyA63NBERCUPQsXruBJ4FHkpMGgK8EFJNIiISoqAHd78LTAMOALj7BqB/WEWJiEh4ggZ/tbsfb3iSuIhLp3aKiHRCQYN/gZndT/ym6xcBzwD/HV5ZIiISlqDBfx9QCawE7gJeBn4cVlEiIhKeoGf11JvZC8AL7l4ZbkkiIhKmFrf4Le4BM6sC1gHrEnff+l+pKU9ERDpaa109PyB+Ns9Z7t7X3fsCU4BpZvaD0KsTEZEO11rw3wLc5O6fNExw903AzcCtYRYmIiLhaC34c929qvHERD9/bjgliYhImFoL/uNtnCciIhmqtbN6zjCzA01MN6BrCPWIiEjIWgx+d9dAbCIiXzJBL+ASEZEvidCC38weMbMKMys9YVpfM3vDzDYkfvcJq30REWlamFv8jwKXNpp2H/CmuxcCbyaei4hICoUW/O6+ENjTaPI1wGOJx48B14bVvoiINC3VffwD3H1n4vEuYEBzC5rZLDMrMbOSykoNDyQi0lHSdnDX3Z0WxvR399nuXuzuxQUFBSmsTETkyy3VwV9uZoMAEr8rUty+iEjkpTr45wK3JR7fBsxJcfsiIpEX5umcTwLvAWPNbJuZ3QH8DLjIzDYAFyaei4hICgW6EUtbuPtNzcy6IKw2RUSkdbpyV0QkYhT8IiIRo+AXEYkYBb+ISMQo+EVEIkbBLyISMQp+EZGIUfCLiESMgl9EJGIU/CIiEaPgFxGJGAW/iEjEKPhFRCJGwS8iEjEKfhGRiFHwi4hEjIJfRCRiFPwiIhGj4BcRiRgFv4hIxCj4RUQiRsEvIhIxCn4RkYhR8LeDu7Oh/CDHa+vTXYqISGA56S6gszp4rIYfv1DKnOU76JabzXljCzi3sCDdZYmItEpb/G30/aeW8+KKndw1cxRD+3TjldJdrC8/mO6yRERapeBvg8VlVby1toK/vmQsf3PZqdx8zsnE8rswZ/l2dfuIREBtXT0fb93HKyt34u7pLidpCv4k1dc7P315DUN6d+O2qSMAyM3O4tpJg9l7pIb3Nlalt0ARCVXlwWp+/vo6ni7ZyrefWMb/ePRD9h+tSXdZSVHwJ2lRWRWrdhzg3ouK6Jqb/dn0UbF8RsZ68OHmvdR3wi0AEWldvTvPLt1KTZ1z+9QRPHDVOBZuqOKfXl2b7tKSouBP0h+XbeOkrjlcecagL8w7a0Qf9hw+zidVh9NQmYiEbXFZFVv3HuXqMwZTNKAnt08bya1fOZknP9jCqh37011eYAr+JByuruW1VeVccfpguuRkf2H++MG96JqbRcmne9JQnYiEqaaungXrKykakM/pQ3t9Nv2eC4ro0z2Pn7y0Jo3VJUfBn4RXS3dxtKaOP5s8pMn5udlZTBzWm1U7DnC4ujbF1YlImEq37+fI8TqmjynAzD6b3qt7LrNmjOLdjbspqziUxgqDS0vwm9mnZrbSzJabWUk6amiLuR/vYFjfbhSf3KfZZSYM7kVtvfPOhsoUViYiYVuyaTex/C6MLujxhXnXTx5KdpbxTMnWNFSWvHRu8X/V3Se6e3EaawjsyPFa3tu0m4vHDfzct31jJ/frQbfcbN5YXZHC6kQkTKt3HGDr3qNMGdm3yb//gp5dOP+U/jy3bDs1dZl/Sre6egJaXLab47X1nH9K/xaXy84yxg7syfx1FdTV6+wekS+DF1fsIMtg0rDezS7z58XDqDpUzdvrMn9vP13B78DrZrbUzGalqYakvLW2gvwuOZw1om+ry54ysCd7Dh/noy17U1CZiITJ3Xm1dBcjYz3o3qX5UW5mji2gV7dcXindmcLq2iZdwT/d3ScDlwHfNbMZjRcws1lmVmJmJZWV6f0GdXfmr61g+pgYeTmt/5MVDehJbrYxb426e0Q6u7KKQ2yqOsz4wb1aXC43O4vzT+nPW2srqM3w7p60BL+7b0/8rgCeB85uYpnZ7l7s7sUFBekd/GzNzoPsOnCs1W6eBl1zs5k0vI8O8Ip8CbxSugszGDf4pFaXvWjcAPYdqaFkc2bv7ac8+M2sh5n1bHgMXAyUprqOZCwqiwf4jKLgX0AzCmOs2nGA3YeqwypLRFLg9dW7mDSsNyd1zW112RlFBeRlZ/H6qvIUVNZ26djiHwAsMrOPgQ+Al9z91TTUEdiist2M6Z/PwF5dA7+mYYjmRWUau0eks6o4eIzS7Qe44NQBgZbP75LD1DH9eGPNrowevC3lwe/um9z9jMTPeHf/SaprSEZ1bR0ffLKb6WNiSb1uwpBe9OqWyzsbFPwindU76+N/vzOT2Nu/4JT+bN1zNKOHbtHpnK1Ytnkfx2rqmZZk8GdnGdPHxFi0oSqjv/lFpHkLN1QSy89j3KDW+/cbzCyKHwtcsD5zj/Ep+FuxuKyKLIMpo1o/jbOxcwtj7DpwrNNcxi0if1JX7yxcX8mMwgKyspq/aLOx4f26MzLWQ8HfmS0qq+L0ocEO7DQ2vTC+l7BQ3T0inU7p9v3sPVLDzLHJn1U4s6iAJZt2c6ymLoTK2k/B34L9R2tYsW0f5xYm183TYGif7oyK9WCRTusU6XQWrK/EjKSP70E8+I/V1PNhho7Uq+BvwZJNu6n3tn3wDc4tjLFk0x6qazPzm19EmrZwfSWnDelFv/wuSb92yqi+5GVnsSBDh29Q8LdgcVkV3RIXY7XV9MICjtbUsTTDL+gQkT/Zf6SGZVv2JnU2z4m65+Vw9si+GdvPr+BvwaKyqvg3d4BhGppzzqi+5GSZTusU6UQWb6yi3pM7jbOxmUUFbKg4xI59Rzuwso6h4G/Gjn1H2VR5uF3dPAA9u+YyeXgfFin4RTqNBesq6dk1h4ktjMbZmoaDwgszcKtfwd+MxYkrbpM9f78p0wtjlO7Yr+EbRDoBd2fB+kqmj4mRk932iCzsn8+gXl0zsrtHwd+MRWVVxPLzGDugZ7vf69zCGO6weOPuDqhMRMK0oeIQuw4ca1c3D4CZMbOogEUbqjLu5iwK/ia4O4vLqpg6OpbUhRvNiV8HkKPTOkU6gYYzcZIZlLE5M4oKOFhdy/Kt+9r9Xh1Jwd+EdeUHqTp0vN39+w2ys4xpY2K8o+EbRDLegvWVFPbPZ3Dvbu1+r2ljYmRnWcad1qngb0LDgdhpbbxwqynnFhawc/8xNlZq+AaRTHXkeC0ffLKn3d08DXp1y2XSsN4szLC9fQV/ExaXVTEq1oMhHfCN36Dh6t+F63V2j0imWrJpN8fr6ts0TENzZhYVsGLbfqoy6OQOBX8jR4/X8e7G3W0epqE5w/p2Z0S/7hqfXySDzVtTQfe87ED31g6q4Uskk+7Ip+BvZHFZFdW19Vw4LtiNF5JxbmEB723creEbRDJQfb0zb3U5M4sK6Jqb3WHvO2FwL2L5ebyZQffgVvA3Mm9NOfldcpgysl+Hv/e5hTGO1tSxbPO+Dn9vEWmfj7fto+JgNReP79iNvqws46JxA5m/tiJjRutU8J+gvt55c20FM4sK2jVMQ3POGd2P7CzLqF0+EYl7Y3U52VnGV8f27/D3vmT8AA4fr/vswtB0U/CfYOX2/VQerObCcR3/wQOc1DWXs0b04dVVmX0/TpGocXdeX13OlJF96d09r8Pff+roGD275PBq6a4Of++2UPCf4MUVO8gJ6Ru/wZWnD2ZT5WHW7DwYWhsikpw1Ow9SVnGIyyYMDOX983KyuODU/sxbU54RV/Eq+BPq6p25H+/gvLEFoXzjN7hswkCys4wXV+wIrQ0RSc4Ly7eTk2Vcefrg0Nq44vTB7D1SkxGDtin4E97/ZDflB6q5ZuKQUNvpl9+FqaP78eKKneruEckAdfXOnOXbOW9sf/r0CG+j77yxBfTrkcezS7eF1kZQCv6EOR/toEdeNhee2vGncTZ21RmD2bLnCMu26OYsIum2ZFN8o++6SeFu9OVmZ3HtpCHMW1PO3sPHQ22rNQp+4OCxGl5auZNLJgykW17Hnb/bnCtOG0TPLjn8x5ItobclIi174v3N9OqWywWnhndsr8ENZw6lpi6+h5FOCn7g2aXbOFRdy61fGZGS9np0yeH6M4fy0oqdGqNfJI227T3Cq6W7uPHsYR160VZzTh10EmcM7cVj722mrj59Xb2RD/76eufRdz9l8vDe7brbTrJuPmc4x+vqebpka8raFJHPe/y9zZhZyjb6AO6cMYpPqg7zxurylLXZWOSDf96acjbvPsK3po1Mabtj+vdk+pgYjyz6lCPHa1PatojEb6j+5AdbuHT8wA4dkLE1l44fyPC+3Xlo4ca0neAR6eCvravnn19bx8hYDy4N6fzdlvzgoiKqDlXz74s/TXnbIlH3q/kbOFhdy93nj0lpuznZWdw5YxQfbdnHW2vTM35PpIP/yQ+3UlZxiPsuO4Xcdtxbs63OPLkPF57an98t2MieNB/lF4mSLbuP8Ni7m/namUM5ddBJKW//xrOGMaZ/Pv/w4uq0jN8T2eDftf8YD76+jrNH9uXiEEbiDOqvLz2FYzV1/PiFlTqvXyQF6uqdv3l+BdlZxr0XjU1LDbnZWTxw1Xg27z7Cb9/emPL2Ixn8tXX1/M+nPqK6tp6fXncaZu2/r25bFQ3oyb0XjeXllbsy4sIOkS+7375dxuKy3fz91eMZ2Ktr2uqYXhjjuklD+Ne3NqT8at7IBX9dvfO3z5fywSd7+Ml1ExjTPz/dJTFrxiimjOzL/c+vZP66zBmzW+TL5rml23jwjfVcM3EwXysemu5y+Ml1Eyjs35PvPfkRq3bsT1m7kQr+g8dq+P5TH/F0yVa+d/4YrpuU/g8e4jdjn31LMUUDenLX40t5duk2dfuIdKD6euehBRv5q2c/ZuroGD/7s9PTuqffoHteDrNvPZMeedn8+UNLeDtFG35pCX4zu9TM1plZmZndF3Z71bV1PLd0G5f8ciEvr9zJfZedwg8vTk/fXnN6dc/l8TumMHFYb/7qmY+58w9LWbktdVsAIl9G7s6iDVV8/aH3+D+vrOXicQP4t9uKU3KFflAn9+vBc9+ZypDe3bj93z/knqc+YkN5uKP35oT67k0ws2zg18BFwDbgQzOb6+6rO7qteavLmfPxDt7ZUMm+IzWcMrAnv/rmZCYP79PRTXWIvj3yePLOc3j4nU38en4ZV/2qnML++ZxbWMD4wScxqHdXYvld6Nsjj2652WRnGTlZRnaWZcTWi6ROU3uETe0kNrff2OTrm1yuqfds+l2D7qQ2t1xT7xt0ndydo8frqDp0nD2Hj7NlzxFKd+xn4fpKtu09Siw/j3+54XRuOHNoRv6tDOrVjTl3T+M388v43cJNvLB8B6cN6cXUMf248azhjIz16ND2Uh78wNlAmbtvAjCzp4BrgA4P/g8372HJpt2cP7Y/100ewvQxsYz80E+UnWX85czRfGPKcJ5buo3XV5XzxPubqa5teQzvhi+ArBSuXzr/KdsbSE0uGVIgBW1KvXsdK79LDl8Z3Y97LiziytMHpWRIhvbompvNvReP5fZpI3l26VbmrangkUWfcP7Y/h0e/JbqvmQzuwG41N3/IvH8FmCKu9/daLlZwKzE07HAupQWmpwYkBn3VGs/rUtm0rpkpkxfl5PdvaDxxHRs8Qfi7rOB2emuIwgzK3H34nTX0RG0LplJ65KZOuu6pOPg7nZg2AnPhyamiYhICqQj+D8ECs1spJnlATcCc9NQh4hIJKW8q8fda83sbuA1IBt4xN1XpbqODtYpuqQC0rpkJq1LZuqU65Lyg7siIpJekbpyV0REFPwiIpGj4E9Ca0NNmFkXM3s6Mf99MxuRhjIDCbAu95rZajNbYWZvmtnJ6agziKBDgJjZ9WbmZpaxp98FWRcz+3ris1llZv+Z6hqDCvB/bLiZzTezjxL/zy5PR52tMbNHzKzCzEqbmW9m9v8S67nCzCanusakubt+AvwQPxC9ERgF5AEfA+MaLfMd4HeJxzcCT6e77nasy1eB7onH3+7M65JYriewEFgCFKe77nZ8LoXAR0CfxPP+6a67HesyG/h24vE44NN0193MuswAJgOlzcy/HHgFMOAc4P1019zaj7b4g/tsqAl3Pw40DDVxomuAxxKPnwUusMwcI6LVdXH3+e5+JPF0CfHrLTJRkM8F4B+BfwKOpbK4JAVZlzuBX7v7XgB3z9RxvIOsiwMNt7/qBexIYX2BuftCYE8Li1wD/MHjlgC9zWxQaqprGwV/cEOArSc835aY1uQy7l4L7Af6paS65ARZlxPdQXyLJhO1ui6JXe9h7v5SKgtrgyCfSxFQZGaLzWyJmV2asuqSE2RdHgBuNrNtwMvA91JTWodL9u8p7TJ2yAbJDGZ2M1AMzEx3LW1hZlnAg8DtaS6lo+QQ7+45j/he2EIzO83d96WzqDa6CXjU3X9hZl8BHjezCe7e8oiE0m7a4g8uyFATny1jZjnEd193p6S65AQaNsPMLgT+Frja3atTVFuyWluXnsAE4G0z+5R4H+zcDD3AG+Rz2QbMdfcad/8EWE/8iyDTBFmXO4D/AnD394CuxAc962w63TA0Cv7gggw1MRe4LfH4BuAtTxz9yTCtrouZTQIeIh76mdqPDK2si7vvd/eYu49w9xHEj1dc7e4l6Sm3RUH+j71AfGsfM4sR7/rZlMIagwqyLluACwDM7FTiwZ/am892jLnArYmze84B9rv7znQX1RJ19QTkzQw1YWb/AJS4+1zg98R3V8uIHwy6MX0VNy/guvwLkA88kzg+vcXdr05b0c0IuC6dQsB1eQ242MxWA3XAj9w94/YqA67LD4GHzewHxA/03p6JG0pm9iTxL9tY4njE3wG5AO7+O+LHJy4HyoAjwLfSU2lwGrJBRCRi1NUjIhIxCn4RkYhR8IuIRIyCX0QkYhT8IiIRo+CXyDOz3mb2nRS0c62ZjQu7HZHWKPhFoDfxkVUDSVyo05a/nWuJj0IpklY6j18iz8waRo5cB8wHTgf6EL9I58fuPidxb4XXgPeBM4lfsHMrcDPxq023Akvd/edmNhr4NVBA/IKeO4G+wIvEB+7bD1zv7htTtY4iJ9KVuyJwHzDB3Scmxljq7u4HEkMiLDGzhqt/C4Hb3H2JmZ0FXA+cQfwLYhmwNLHcbOAv3X2DmU0BfuPu5yfe50V3fzaVKyfSmIJf5PMM+KmZzQDqiQ+vOyAxb3NivHWAacAcdz8GHDOz/wYws3xgKn8a6gKgS6qKFwlCwS/yed8k3kVzprvXJEb07JqYdzjA67OAfe4+MZzyRNpPB3dF4CDx4ZshPpR2RSL0vwo0d6/hxcBVZtY1sZV/JYC7HwA+MbOvwWcHgs9ooh2RtFHwS+QlRrdcnLiZ9kSg2MxWEj94u7aZ13xIfDjeFcTvTraS+EFbiO813GFmHwOr+NMtB58CfpS4ufjokFZHpFU6q0ekjcws390PmVl34jdyn+Xuy9Jdl0hr1Mcv0nazExdkdQUeU+hLZ6EtfhGRiFEfv4hIxCj4RUQiRsEvIhIxCn4RkYhR8IuIRMz/B6Y77Ni1Z8iBAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(train['target'])" + ] + }, + { + "cell_type": "markdown", + "id": "89084ac0", + "metadata": { + "papermill": { + "duration": 0.038491, + "end_time": "2022-05-11T11:03:26.971134", + "exception": false, + "start_time": "2022-05-11T11:03:26.932643", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Define target" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fb918316", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:27.049570Z", + "iopub.status.busy": "2022-05-11T11:03:27.049053Z", + "iopub.status.idle": "2022-05-11T11:03:27.055599Z", + "shell.execute_reply": "2022-05-11T11:03:27.054810Z" + }, + "papermill": { + "duration": 0.049053, + "end_time": "2022-05-11T11:03:27.057883", + "exception": false, + "start_time": "2022-05-11T11:03:27.008830", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 1\n", + " ..\n", + "899995 1\n", + "899996 0\n", + "899997 0\n", + "899998 0\n", + "899999 1\n", + "Name: target, Length: 900000, dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target = train['target']\n", + "target" + ] + }, + { + "cell_type": "markdown", + "id": "d388de97", + "metadata": { + "papermill": { + "duration": 0.037479, + "end_time": "2022-05-11T11:03:27.134251", + "exception": false, + "start_time": "2022-05-11T11:03:27.096772", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Combine train and test" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "06fe1a2d", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:27.213073Z", + "iopub.status.busy": "2022-05-11T11:03:27.212416Z", + "iopub.status.idle": "2022-05-11T11:03:28.484296Z", + "shell.execute_reply": "2022-05-11T11:03:28.483417Z" + }, + "papermill": { + "duration": 1.313462, + "end_time": "2022-05-11T11:03:28.486506", + "exception": false, + "start_time": "2022-05-11T11:03:27.173044", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
f_00f_01f_02f_03f_04f_05f_06f_07f_08f_09...f_20f_21f_22f_23f_24f_25f_26f_28f_29f_30
0-1.3732460.238887-0.2433760.567405-0.6477150.8393260.113133151...-0.9197173.058541-2.5407390.766952-2.730628-0.2081771.36340267.60915300
11.697021-1.710322-2.230332-0.5456611.113173-1.5521750.447825134...-1.0754342.1790502.278315-0.633658-1.217077-3.782194-0.058316377.09641500
21.6817260.616746-1.0276890.810492-0.6090860.113965-0.708660102...-3.485342-0.784235-1.385775-0.520558-0.0091212.788536-3.703488-195.59970202
3-0.118172-0.587835-0.8046382.0868220.371005-0.128831-0.282575321...-2.100177-2.3438190.572594-1.6532131.686035-2.533098-0.608601210.82620500
41.148481-0.176567-0.664871-1.1013430.4678750.5001170.407515330...0.6050331.133665-3.912929-1.4303662.127649-3.3067844.371371-217.21179801
..................................................................
6999950.6401100.897808-0.5239561.563760-0.092281-0.6108670.535426016...-2.1851902.6040481.1228670.5181101.2438370.5751110.076372204.18653900
699996-0.191771-0.035246-0.1185330.5847502.1269770.568659-0.052663434...-0.2395523.0298571.384682-1.1357402.982713-1.5117602.225218-97.69459102
699997-0.331704-0.328845-1.1855031.022128-0.483099-0.107146-0.968281112...-0.9226264.021273-1.8452661.096011-2.734508-4.885955-2.248739130.62274510
699998-2.031073-1.2383980.964699-1.0459500.9060640.634301-0.707474511...-3.0799961.453864-1.6966061.0189951.973697-0.353068-3.333449-364.62514800
699999-0.085906-0.0021242.2273750.2171453.179153-1.6601880.891989034...-2.128546-3.549082-4.325318-5.0172210.251268-3.236026-0.362070-155.41734201
\n", + "

1600000 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " f_00 f_01 f_02 f_03 f_04 f_05 f_06 \\\n", + "0 -1.373246 0.238887 -0.243376 0.567405 -0.647715 0.839326 0.113133 \n", + "1 1.697021 -1.710322 -2.230332 -0.545661 1.113173 -1.552175 0.447825 \n", + "2 1.681726 0.616746 -1.027689 0.810492 -0.609086 0.113965 -0.708660 \n", + "3 -0.118172 -0.587835 -0.804638 2.086822 0.371005 -0.128831 -0.282575 \n", + "4 1.148481 -0.176567 -0.664871 -1.101343 0.467875 0.500117 0.407515 \n", + "... ... ... ... ... ... ... ... \n", + "699995 0.640110 0.897808 -0.523956 1.563760 -0.092281 -0.610867 0.535426 \n", + "699996 -0.191771 -0.035246 -0.118533 0.584750 2.126977 0.568659 -0.052663 \n", + "699997 -0.331704 -0.328845 -1.185503 1.022128 -0.483099 -0.107146 -0.968281 \n", + "699998 -2.031073 -1.238398 0.964699 -1.045950 0.906064 0.634301 -0.707474 \n", + "699999 -0.085906 -0.002124 2.227375 0.217145 3.179153 -1.660188 0.891989 \n", + "\n", + " f_07 f_08 f_09 ... f_20 f_21 f_22 f_23 \\\n", + "0 1 5 1 ... -0.919717 3.058541 -2.540739 0.766952 \n", + "1 1 3 4 ... -1.075434 2.179050 2.278315 -0.633658 \n", + "2 1 0 2 ... -3.485342 -0.784235 -1.385775 -0.520558 \n", + "3 3 2 1 ... -2.100177 -2.343819 0.572594 -1.653213 \n", + "4 3 3 0 ... 0.605033 1.133665 -3.912929 -1.430366 \n", + "... ... ... ... ... ... ... ... ... \n", + "699995 0 1 6 ... -2.185190 2.604048 1.122867 0.518110 \n", + "699996 4 3 4 ... -0.239552 3.029857 1.384682 -1.135740 \n", + "699997 1 1 2 ... -0.922626 4.021273 -1.845266 1.096011 \n", + "699998 5 1 1 ... -3.079996 1.453864 -1.696606 1.018995 \n", + "699999 0 3 4 ... -2.128546 -3.549082 -4.325318 -5.017221 \n", + "\n", + " f_24 f_25 f_26 f_28 f_29 f_30 \n", + "0 -2.730628 -0.208177 1.363402 67.609153 0 0 \n", + "1 -1.217077 -3.782194 -0.058316 377.096415 0 0 \n", + "2 -0.009121 2.788536 -3.703488 -195.599702 0 2 \n", + "3 1.686035 -2.533098 -0.608601 210.826205 0 0 \n", + "4 2.127649 -3.306784 4.371371 -217.211798 0 1 \n", + "... ... ... ... ... ... ... \n", + "699995 1.243837 0.575111 0.076372 204.186539 0 0 \n", + "699996 2.982713 -1.511760 2.225218 -97.694591 0 2 \n", + "699997 -2.734508 -4.885955 -2.248739 130.622745 1 0 \n", + "699998 1.973697 -0.353068 -3.333449 -364.625148 0 0 \n", + "699999 0.251268 -3.236026 -0.362070 -155.417342 0 1 \n", + "\n", + "[1600000 rows x 30 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combi = train.drop(['target'], axis=1).append(test)\n", + "combi = combi.drop(['id', 'f_27'], axis=1)\n", + "combi" + ] + }, + { + "cell_type": "markdown", + "id": "aa697977", + "metadata": { + "papermill": { + "duration": 0.039281, + "end_time": "2022-05-11T11:03:28.565424", + "exception": false, + "start_time": "2022-05-11T11:03:28.526143", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Heatmap" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0e948e76", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:28.645338Z", + "iopub.status.busy": "2022-05-11T11:03:28.645031Z", + "iopub.status.idle": "2022-05-11T11:03:33.606584Z", + "shell.execute_reply": "2022-05-11T11:03:33.605587Z" + }, + "papermill": { + "duration": 5.004322, + "end_time": "2022-05-11T11:03:33.608988", + "exception": false, + "start_time": "2022-05-11T11:03:28.604666", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAm0AAAIYCAYAAAA/5NvMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABM7klEQVR4nO3de7xcdX3v/9c7CVcDcvECEnATRJHWgGQf8FqNRhtoH0IvPy8V0Rww5iSeBCxt7a/n6Dn66zm1x2rraYWmyraUVNta9eRUouCt1kAoG8RwidBEUgmCyN0IQsJ+//6YFRw3s/ee7O+azMye9zOP9cjM97s+a33W7JnZ3/1d6/tdsk1ERERE9LZZ3U4gIiIiIqaWRltEREREH0ijLSIiIqIPpNEWERER0QfSaIuIiIjoA2m0RURERPSBNNoiIiIi9oCkJZJulbRF0ntb1B8j6euSvi1pk6Qzatlv5mmLiIiIaI+k2cBtwOuA7cC1wFts39K0zhrg27YvknQicLntodJ9p6ctIiIion2nAltsf8/248BngDPHrWPg4Orx04Ef1LHjOXVsJCIiImJAHAXc0fR8O3DauHX+G3CFpP8MPA1YXMeOe67RtvPe7xWdrz174XvqSiUiIqI2b9g5tyh+3T47aspk+v7u37+gbudQ2k5ox77PPO5dwLKmojW21+zBJt4CfMr2n0h6KfA3kn7R9lhJXj3XaIuIiIjopqqBNlEj7U7g6Kbn86qyZucCS6ptXS1pf+AZwD0leeWatoiIiOgfY090fpnctcDxko6VtC/wZmDduHW+D7wWQNILgf2BH5UeehptEREREW2yvQt4N/BlYDPw97ZvlvQBSW+oVvtt4J2SvgN8GniHa5iuo63To5JWAf8JuN72W8fVCfgz4AzgkSqx66u6twP/pVr1/7P916UJR0RExAAruyysnhTsy4HLx5W9r+nxLcDL695vu9e0rQAW297eou504PhqOQ24CDhN0mHA+4FhGkNfr5O0zvYD5WlHREREDJYpT49KuhiYD6yXdEGLVc4ELnXDRuAQSUcCvwxcafv+qqF2JdVFeRERERHTMjbW+aVHTdnTZnu5pCXAItv3tlil1XwlR01SHhERERF7qCcGIkhaJmlU0ugnLv10t9OJiIiIHmWPdXzpVXXM0zbRfCV3Aq8eV/6NVhtong9lb0yaFxEREdFv6uhpWweco4aXAA/ZvovGUNjXSzpU0qHA66uyiIiIiOnJNW1FLqcx3ccWGlN+LAWwfb+kD9KYhA7gA7bvr2F/EREREQOnrUab7aFJ6gysnKDuEuCSaWUWERERMV4PX3PWaT0xECEiIiIiJtf26VFJS4HV44o32G7ZyxYRERFRu6nvDTpjtd1osz0CjHQwl4iIiIjJDfDp0ToGItTq7IXvKYq/7LqPdHX/ERERrcwiM1pFmZ5rtEVERERMqIen5Oi0DESIiIiI6APpaYuIiIi+0cu3meq0tnraJK2StFnS2hZ1kvQxSVskbZJ0SlPdlyQ9KOmf6kw6IiIiYtC029O2Alhse3uLutOB46vlNOCi6n+A/wUcCLyrMM+IiIiIXNM2GUkXA/OB9ZIuaLHKmcClbtgIHCLpSADbXwV+XGfCEREREYNoyp4228slLQEW2b63xSpHAXc0Pd9eld1VT4oRERERlVzTFhERERG9rI7Ro3cCRzc9n1eVtU3SMmAZwMLDTuK4uUM1pBUREREzzgDfxqqOnrZ1wDnVKNKXAA/Z3qNTo7bX2B62PZwGW0RERMRT1dHTdjlwBrAFeARYurtC0r8AJwBzJW0HzrX95Rr2GREREYNogK9pa6vRZntokjoDKyeoe+X00oqIiIiIZrkjQkRERPSPAZ6nre1Gm6SlwOpxxRtst+xli4iIiIj6tN1osz0CjHQwl4iIiIjJDfA1bZmnLSIiIqIPzLhr2s5e+J6i+Muu+0hX9x9RpzFcFD8L1ZRJROw3VvZ5jMoAX9OWnraIiIiIPjDjetoiIiJi5rJzR4RJSVolabOktS3qJOljkrZI2iTplKr8ZElXS7q5Kn9T3clHRETEgPFY55ce1W5P2wpgse3tLepOB46vltOAi6r/HwHOsf1vkp4DXCfpy7YfLE87IiIiYrBM2WiTdDEwH1gv6RLbHx23ypnApdWdETZKOkTSkbZv272C7R9Iugd4JvBgfelHRETEQBnggQhTNtpsL5e0BFhk+94WqxwF3NH0fHtV9uRN4yWdCuwLbC1LNyIiImIwdXwggqQjgb8B3m738IniiIiI6H0D3JSoY8qPO4Gjm57Pq8qQdDDwReAPbG+caAOSlkkalTS6dce2GlKKiIiImFnqaLStA86pRpG+BHjI9l2S9gU+T+N6t89OtgHba2wP2x4+bu5QDSlFRETEjDT2ROeXHlXH6dHLgTOALTRGjC6tyt8I/BJwuKR3VGXvsH1DDfuMiIiIGChtNdpsD01SZ2Bli/LLgMumnVlERETEeLmmLSIiIiJ6WdunRyUtBVaPK95g+ym9bBEREREdkXnapmZ7BBjpYC4RERERMYHcMD4iIiL6xwBf05ZG2zhnL3xPUfxl132k6zlERETv+YVDHijbwKP71ZNI9K002iIiIqJ/DPA1bRk9GhEREdEH0tMWERER/SM9bZOTtErSZklrW9RJ0sckbZG0SdIpVflzJV0v6QZJN0taXnfyEREREYOi3Z62FcBi29tb1J0OHF8tpwEXVf/fBbzU9mOS5gI3SVpn+wc15B0REREDyO7de4N22pQ9bZIuBuYD6yVd0GKVM2ncFN62NwKHSDrS9uO2H6vW2a+dfUVEREREa1P2tNleLmkJsMj2vS1WOQq4o+n59qrsLklHA18Engf8TnrZIiIiokiuaesM23fYXkCj0fZ2Sc9utZ6kZZJGJY1u3bGtkylFRERE9KU6Gm13Akc3PZ9XlT2p6mG7CXhlqw3YXmN72PbwcXOHakgpIiIiZiSPdX7pUXU02tYB51SjSF8CPGT7LknzJB0AIOlQ4BXArTXsLyIiIgbV2Fjnlx5VxzxtlwNnAFuAR4ClVfkLgT+RZEDAh23fWMP+IiIiIgZOW40220OT1BlY2aL8SmDBtDOLiIiIGK+HT192WqbhiIiIiOgDbZ8elbQUWD2ueIPtp/SyRURERHRED19z1mltN9psjwAjHcwlIiIiIiaQG8bX7OyF7ynexmXXfaTrOQSM4aL4WaimTPo7hwjo/uepdP915PCtHYeXJTB7R1n8TJFr2iIiIiKil6WnLSIiIvrHAF/Tlp62iIiIiD7QVqNN0ipJmyWtbVEnSR+TtEXSJkmnjKs/WNJ2SX9eV9IRERExoHJHhCmtABbb3t6i7nTg+Go5Dbio+n+3DwLfLEkyIiIiYtBN2dMm6WJgPrBe0gUtVjkTuNQNG4FDJB1ZxS4Eng1cUWPOERERMahyw/iJ2V4O/ABYZPujLVY5Crij6fl24ChJs4A/AS6sI9GIiIiIQdbJ0aMrgMttb5cyV1RERETUoIevOeu0OhptdwJHNz2fV5W9FHilpBXAXGBfSTtsv3f8BiQtA5YBLDzsJI6bO1RDWhEREREzRx1TfqwDzqlGkb4EeMj2XbbfavsY20M0TpFe2qrBBmB7je1h28NpsEVERMSEBviatjp62i4HzgC2AI8AS2vYZkREREQ0aavRVvWWTVRnYOUU8Z8CPrUHeUVEREQ81QBf05Y7IkRERET0gbZPj0paCqweV7zB9qS9bBERERG16eFrzjqt7Uab7RFgpIO5REREREwup0cjIiIiopd1cnLdmKazF76nKP6y6z7S1f3PFLPIpNARden256nb+wc4ZdaPi+Kv6IFj6AnpaYuIiIiIXpaetoiIiOgfdrcz6Jq2etokrZK0WdLaFnWS9DFJWyRtknRKU90Tkm6olnV1Jh4RERExSNrtaVsBLLa9vUXd6cDx1XIacFH1P8Cjtk8uTTIiIiICyDVtk5F0MTAfWC/pgharnEnjvqK2vRE4RNKRNecZERERMdCm7GmzvVzSEmCR7XtbrHIUcEfT8+1V2V3A/pJGgV3AH9n+QnnKERERMbAGuKet0wMRnmv7Tknzga9JutH21g7vMyIiImLGqWPKjzuBo5uez6vKsL37/+8B3wBe3GoDkpZJGpU0unXHthpSioiIiBnJY51felQdjbZ1wDnVKNKXAA/ZvkvSoZL2A5D0DODlwC2tNmB7je1h28PHzR2qIaWIiIiIzpC0RNKt1cwZ751gnTdKukXSzZL+to791nF69HLgDGAL8AiwtCp/IfCXksZoNA7/yHbLRltEREREW7p8TZuk2cBfAK+jcR3/tZLWNbdxJB0P/D7wctsPSHpWHftuq9Fme2iSOgMrW5RfBbxo2plFRERE9J5TgS3VpV9I+gyNmTSaO6beCfyF7QcAbN9Tx45zG6uIiIjoH3bnl8lNNGtGs+cDz5e0QdLGahaOYm2fHpW0FFg9rniD7af0skVERET0K0nLgGVNRWtsr9mDTcyhcdOBV9MYoPlNSS+y/WBJXm032myPACMlO4uIiIgosheuaasaaBM10iacNaPJduAa2zuB2yXdRqMRd21JXjk9GhEREdG+a4HjJR0raV/gzTRm0mj2BRq9bLtn0Hg+8L3SHXd6ct3ogrMXvqco/rLrPtL1HCIiZprRsYPKNjB7Rz2J9Lsujx61vUvSu4EvA7OBS2zfLOkDwKjtdVXd6yXdAjwB/I7t+0r3nUZbRERExB6wfTmNKc+ay97X9NjAe6qlNmm0RURERP/o4TsWdFoabREREdE3PDbllBwzVlsDESStkrRZ0toWdZL0sepWDpskndJUd4ykK6rYWyQN1Zh7RERExMBot6dtBbDY9vYWdafTGMZ6PHAacFH1P8ClwB/avlLSXGBw+zQjIiKiXJcHInTTlD1tki4G5gPrJV3QYpUzgUvdsBE4RNKRkk4E5ti+EsD2DtuP1Jl8RERExKCYsqfN9vLq9guLbN/bYpWJbucwD3hQ0ueAY4GvAO+1/UR52hERETGQBnggQicn150DvBK4EPgPNHrr3tFqRUnLJI1KGt26Y1sHU4qIiIjoT3U02ia6ncN24Abb37O9i8bswKc8Nbxxuwjbw7aHj5s7VENKERERMSONufNLj6qj0bYOOKcaRfoS4CHbd9G4zcMhkp5Zrfca4JYa9hcRERExcOqYp+1y4AxgC/AIsBTA9hOSLgS+KknAdcBf1bC/iIiIGFQDPHq0rUab7aFJ6gysnKDuSmDBtDKLiIiIiCfljggRERHRP9LTNjVJS4HV44o32G7ZyxYRERER9Wm70WZ7BBjpYC4RERERk3Pvju7stJwejac4e+F7irdx2XUfKYr/44X/tSj+Jn5SFA+wZef9RfH3P76jKP4tc08oigf4ceGd476zq9V82u1bMOfwongAoaL4L/y4bND6aw46vih+h3cVxddhVuFrWE8OZXrhhNgYZY2Fr8wu+064b+zRoniAw2cdULyN6J402iIiIqJ/DPA1bZ28I0JERERE1CQ9bREREdE/eviOBZ3WVk+bpFWSNkta26JOkj4maYukTZJOqcoXSbqhafmppLNqzj8iIiJiILTb07YCWGx7e4u604Hjq+U04CLgNNtfB04GkHQYjTsmXFGacERERAww55q2CUm6GJgPrJd0QYtVzgQudcNGGvcbPXLcOr8JrLf9SHHGEREREQNoyp4228slLQEW2W41/v8o4I6m59ursruayt4MlM0BERERETHA17R1fCBC1ev2IuDLk6yzDFgGsPCwkzhu7lCn04qIiIg+5Ez5UeRO4Oim5/Oqst3eCHze9s6JNmB7je1h28NpsEVEREQ8VR2NtnXAOdUo0pcAD9luPjX6FuDTNewnIiIiBt2YO7/0qDpOj14OnEFjdOgjwNLdFZKGaPTC/XMN+4mIiIgYWG012mwPTVJnYOUEddtoDEqIiIiIKJcpPyIiIiKil7V9elTSUmD1uOINtlv2skVERETUroevOeu0thtttkeAkQ7mEhERERETyA3jIyIion8M8DxtabRFR/zxwv9aFP+7132wKP41J72zKB5AqCh+7pz9i+K3+adF8QAHaHZR/L4q+4q4148XxQMcon2K4o8/cPxd9fZw/5Tt/8dMOEVl20rfi6UXL++ijtNRM+EYuutFsw8t3sYPavheie5Joy0iIiL6xwBf05bRoxERERF9oK1Gm6RVkjZLWtuiTpI+JmmLpE2STmmq+2NJN1exH5NU1j8eERERg81jnV96VLunR1cAi21vb1F3OnB8tZwGXAScJullwMuBBdV63wJeBXyjJOGIiIiIQTRlT5uki4H5wHpJF7RY5UzgUjdsBA6RdCRgYH9gX2A/YB/gh7VlHhEREYMn9x6dmO3lkpYAi2zf22KVo4A7mp5vB46yfbWkrwN30Rg29Oe2N9eRdERERMSg6djoUUnPA14IzKuKrpT0Stv/0ql9RkRExMzmAZ6nrY7Ro3cCRzc9n1eV/Rqw0fYO2zuA9cBLW21A0jJJo5JGt+7YVkNKERERETNLHY22dcA51SjSlwAP2b4L+D7wKklzJO1DYxBCy9OjttfYHrY9fNzcoRpSioiIiBkp17QVuRw4A9gCPAIsrco/C7wGuJHGoIQv2f6/NewvIiIiYuC01WizPTRJnYGVLcqfAN417cwiIiIixuvhnrBOyx0RIiIiIvpA26dHJS0FVo8r3mD7Kb1sERERER3Rw3cs6LS2G222R4CRDuYSERERMbmcHo2IiIiIXtaxyXVjsN3ET4riX3PSO4viv/advyqKB1hy8vKi+Oftc1hR/AGaXRQPcPsTDxfFD80+qCj+x95VFA9w59gjRfHPnHVAUfymXfcXxT999v5F8QDl74Qys1DxNmbXsI0y5b0zLt5G2WvwtZ9+v3D/cMJ+zyreRrc5PW0RERER0cvS0xYRERH9Iz1tEREREdHL2mq0SVolabOktS3qJOljkrZI2iTplKa6D0m6qVreVGfiERERMYDGxjq/9Kh2T4+uABbb3t6i7nTg+Go5DbgIOE3SrwCnACcD+wHfkLTedtmV0REREREDaMqeNkkXA/OB9ZIuaLHKmcClbtgIHCLpSOBE4Ju2d9n+CbAJWFJj7hERETFoBviG8VM22mwvB34ALLL90RarHAXc0fR8e1X2HWCJpAMlPQNYBBxdnnJERETE4OnY6FHbV0j6D8BVwI+Aq4EnWq0raRmwDGDhYSdx3NyhTqUVERER/ayHe8I6rY7Ro3fy8z1o86oybP+h7ZNtv47GrIK3tdqA7TW2h20Pp8EWERER8VR1NNrWAedUo0hfAjxk+y5JsyUdDiBpAbAAuKKG/UVERMSAst3xpVfVcXr0cuAMYAvwCLC0Kt8H+BdJAA8DZ9s13NMmIiIiYgC11WizPTRJnYGVLcp/SmMEaUREREQ9ck1bRERERPSytk+PSloKrB5XvMH2U3rZIiIiIjpigHva2m602R4BRjqYS0RERERMoGPztMX0jVH2V8QsVFMm07dl5/1F8So8hiUnLy+KB/jSDRcXxf/li99XFP9F7iuKBxiafVBR/CNuObVi256g/B5+jxaOXzpI+xbFL5hzWFH83f5pUXwddvXAd8rOGt4LJeo4htJtlL4Cb9lvfuEW4Dp+XLyNbvMA97TlmraIiIiIPpCetoiIiOgfA9zTlkZbRERE9I/unmnvqrZOj0paJWmzpLUt6k6QdLWkxyRdOK5uiaRbJW2R9N66ko6IiIgYNO32tK0AFtve3qLufmAVcFZzoaTZwF8ArwO2A9dKWmf7lumnGxEREYMsAxEmIeliYD6wXtIF4+tt32P7WmDnuKpTgS22v2f7ceAzwJk15BwRERExcKbsabO9XNISYJHte/dg20cBdzQ93w6ctof5RURERPxMetq6S9IySaOSRrfu2NbtdCIiIiJ6TicbbXcCRzc9n1eVPYXtNbaHbQ8fN3eogylFREREXxvbC0uP6mSj7VrgeEnHStoXeDOwroP7i4iIiJixiudpk3QEMAocDIxJOh840fbDkt4NfBmYDVxi++bS/UVERMTgGuTRo2012mwPTVJ3N41Tn63qLgcun1ZmEREREfGk3BEhIiIi+kcPX3PWaW032iQtBVaPK95ge2W9KUVERETEeG032myPACMdzCUiIiJiUoN8TVtPzNMWEREREZPLNW09aBbqdgrF7n98R1H83Dn7F8U/b5/DiuIB/vLF7yuKf9e3P1AU/+VTVhXFAzzox4vij9ABRfE377q/KB5gx9hjRfEnzz60KL7/P40zgynrXZkR36t6onwjM6GTaoCvaUtPW0REREQfSE9bRERE9A2np21iklZJ2ixpbYu6EyRdLekxSReOq7tE0j2Sbqoz4YiIiIhB1E5P2wpgse3tLeruB1YBZ7Wo+xTw58Cl000uIiIi4uekp601SRcD84H1ki4YX2/7HtvXAjtb1H2TRqMuIiIiIgpN2mizvRz4AbDI9kf3TkoRERERrXms88tUJC2RdKukLZLeO8l6vyHJkobrOPYMRIiIiIj+0eXTo5JmA38BvA7YDlwraZ3tW8atdxCNO0ldU9e+e2LKD0nLJI1KGt26Y1u304mIiIiYyKnAFtvfs/048BngzBbrfRD4EPDTunbcE40222tsD9sePm7uULfTiYiIiB7VA6dHjwLuaHq+vSp7kqRTgKNtf7HOYy9qtEk6QtJ24D3Af5G0XdLBVd2ngauBF1Tl55anGxEREdFZzWcAq2XZHsTOAj4C/HbdeU15TZvtoUnq7gbmTVD3lumnFREREfFUe2NyXdtrgDUTVN8JHN30fF5VtttBwC8C35AEcASwTtIbbI+W5NUTp0cjIiIi+sS1wPGSjpW0L/BmYN3uStsP2X6G7aGq42sjUNxggzZHj0paSmMERLMNtleWJhARERHRrm7fxsr2LknvBr4MzAYusX2zpA8Ao7bXTb6F6Wur0WZ7BBjpVBIRERER/cL25cDl48reN8G6r65rv5mnLSIiIvqH1e0MuiaNtuiIt8w9oSh+m8umtTlAs4viAb7IfUXxXz5lVVH8567/WFE8wNKFFxbF3+1Hi+IPnX1AUXwd2/jOEw8WxR80a5+i+Nk9cOnwLLr/S25OYQ5jxfH97/JHby/exgv3f3YNmUS3pNEWERERfaPb17R1U/f/BIyIiIiIKaWnLSIiIvqGx7p/ur9bpuxpk7RK0mZJa1vUnSDpakmPSbqwqfxoSV+XdIukmyWNny4kIiIiIvZAOz1tK4DFtre3qLsfWAWcNa58F/Dbtq+v7nJ/naQrbd9SlG1EREQMtFzTNgFJFwPzgfWSLhhfb/se29cCO8eV32X7+urxj4HNjLuZakRERES0b9KeNtvLJS0BFtm+dzo7kDQEvBi4ZjrxEREREbt5gOdp6+joUUlzgX8Ezrf98CTrLZM0Kml0645tnUwpIiIioi91rNEmaR8aDba1tj832bq219getj183NyhTqUUERERfc5jnV96VUcabZIEfBLYbPsjndhHRERExCApmqdN0hHAKHAwMCbpfOBEYAHwNuBGSTdUq/+/1Q1WIyIiIqZlkOdpm7LRZntokrq7gXktqr4FPXCzu4iIiJhR7G5n0D25jVVEREREH2jr9KikpcD4uxpssL2y/pQiIiIiWsvp0SnYHgFGOpxLREREREwgN4wfZ4zunyyfNQMuB/wxZWOmD9Dsovjbn5hwWsC2Dc0+qCj+QT9eFL904YVTrzSFkes+XBT/poXnF8Ufrv2K4gHu82NF8aXXgJQew8PeOfVKUyidgaD0e62O76RdXf5ureMYSt9LpT/HBfsfWbgFeKIHfseVGuSetlzTFhEREdEH0tMWERERfSOjRyMiIiKip7XVaJO0StJmSWtb1J0g6WpJj0m6sKl8f0n/Kuk7km6W9N/rTDwiIiIGj8fU8aVXtXt6dAWw2Pb2FnX3A6uAs8aVPwa8xvaO6j6k35K03vbGaWcbERERMaCm7GmTdDEwH1gv6YLx9bbvsX0tsHNcuW3vqJ7uUy0DfCY6IiIiStnq+NKrpmy02V4O/ABYZPuje7JxSbOre4/eA1xp+5ppZRkREREx4Do6EMH2E7ZPpnF/0lMl/WKr9SQtkzQqaXTrjm2dTCkiIiL6mMc6v/SqvTJ61PaDwNeBJRPUr7E9bHv4uLlDeyOliIiIiL7SsXnaJD0T2Gn7QUkHAK8DPtSp/UVERMTMN9bD15x1WnGjTdIRwChwMDAm6XzgROBI4K8lzabRo/f3tv+pdH8RERERg6jdG8YPTVJ3N41r1sbbBLx4emlFREREPFUvj+7stNwRISIiIqIPtH16VNJSYPW44g22V9abUkRERERrvXzHgk5ru9FmewQY6WAuERERETGBjo0e7VezGNwWfJ2+s+veovh9VfbWHJp9UFE8wCN+oij+CB1QFH+3Hy2KB3jTwvOL4v/uuj8tin/X8O8WxQNse/z+oviX7XdUUfzWJx4uij9k1n5F8dGQ72a4b+ynxds4eNa+NWTSXR7geyvlmraIiIiIPpCetoiIiOgbuaYtIiIiog8M8uS6bZ0elbRK0mZJa1vUnSDpakmPSbqwRf1sSd+WlIl1IyIiIqap3Z62FcBi29tb1N0PrALOmiB2NbCZxh0TIiIiIqYtk+tOQtLFwHxgvaQLxtfbvsf2tcDOFrHzgF8BPlFDrhEREREDa8qeNtvLJS0BFtne03kc/hT4XaB8/oWIiIgYeJnyowMk/Spwj+3rOrWPiIiIiEHRyXnaXg68QdI24DPAayRd1mpFScskjUoa3bpjWwdTioiIiH42ZnV86VUda7TZ/n3b82wPAW8Gvmb77AnWXWN72PbwcXOHOpVSRERERN8qnqdN0hHAKI3RoWOSzgdOtF1275eIiIiIcQZ59Ghbjbaqt2yiuruBeVPEfwP4xh7kFRERERFNckeEiIiI6BuDPHq07UabpKU0JspttsH2ynpTioiIiIjx2m602R4BRjqYS0RERMSkenl0Z6d1csqPiIiIiKhJrmmLjlgw5/Ci+Hv9eFH8j72rKB7gCcaK4m/edX9R/KGzDyiKBzhc+xXFv2v4d4vi/3L0j4viAc485d1F8VufKBvIvr/KviZ30f0LcOZQ1jNR9kmoJ4dSdfwcSl+HWYWvwSGzyj7PM8Ugjx5NT1tEREREH0hPW0RERPSNXNM2CUmrJG2WtLZF3QmSrpb0mKQLx9Vtk3SjpBskjdaZdERERMSgaaenbQWw2Pb2FnX3A6uAsyaIXWT73mnmFhEREfFzun+VaPdM2tMm6WJgPrBe0gXj623fY/taYGeH8ouIiIgIpuhps71c0hKm12Nm4ApJBv7S9prpJhkREREBg31NWycHIrzC9p2SngVcKem7tr/Zwf1FRETEDJcpPzrA9p3V//cAnwdOnWhdScskjUoa3bpjW6dSioiIiOhbHWm0SXqapIN2PwZeD9w00fq219getj183NyhTqQUERERM8DYXlh6VdHpUUlHAKPAwcCYpPOBE4FnAJ+XtHsff2v7S2WpRkRERAyuKRtttocmqbsbmNei6mHgpOmnFREREfFU7vIt0bopt7GKiIiI6ANtnR6VtBRYPa54g+2V9acUERER0drYAM+u21ajzfYIMNLhXCIiIiJiArlhfERERPSNsQG+pi2NtugIFX6oDtE+RfF3jj1SFA/wqHcVxe8Ye6wo/tDZBxTFA9znshy2PX5/UfyZp7y7KB7g/1z/50XxL1+wtCj+ufscUhQ/p4ZfMKVTEPTCFAa7unzHyFk1/BxKLwIv/Tk8TwcWbgG2uPy7MbonjbaIiIjoGxk9GhERERE9LT1tERER0Td64XR/t0zZ0yZplaTNkta2qDtB0tWSHpN04bi6QyR9VtJ3q/iX1pl4RERExCBpp6dtBbDY9vYWdfcDq4CzWtT9GfAl278paV+g/ArKiIiIGGi5pm0Cki4G5gPrJV0wvt72PbavBXaOi3s68EvAJ6v1Hrf9YF1JR0RERAyaSXvabC+XtARYZPvePdjuscCPgBFJJwHXAatt/2T6qUZERMSgyzVt9ZsDnAJcZPvFwE+A9060sqRlkkYljW7dsa1DKUVERET0r0412rYD221fUz3/LI1GXEu219getj183NyhDqUUERER/W5sLyy9qiONNtt3A3dIekFV9Frglk7sKyIiImIQFM3TJukIYBQ4GBiTdD5wou2Hgf8MrK1Gjn4PKLuXTERERAy8QR49OmWjzfbQJHV3A/MmqLsBGJ5uYhERERHjjQ1umy23sYqIiIjoB22dHpW0FFg9rniD7ZX1pxQRERHR2lhOj07O9ggw0uFcIiIiImICuWF8dMQXflw2WPj4A48sin/mrAOK4gEO0r5F8SfPPrQo/jtPPFgUD+XXP7xsv6OK4rc+8XBhBvDyBWVjmDZsKvt789dPWVUUf6DyNQswawb0jnR7Koiv7byreBvHzHl6DZl0l7udQBflmraIiIiIPpA/ASMiIqJvdLvHs5vS0xYRERHRB6ZstElaJWmzpLUt6k6QdLWkxyRd2FT+Akk3NC0PVxPvRkREREzbmNTxZSqSlki6VdIWSU+5t7qk90i6RdImSV+V9Nw6jr2d06MrgMW2t7eoux9YBZzVXGj7VuBkAEmzgTuBz5ckGhEREdFtVbvmL4DX0bjX+rWS1tluHoH3bWDY9iOS/hPwx8CbSvc9aU+bpIuB+cB6SReMr7d9j+1rgZ2TbOa1wFbb/16UaURERAw874VlCqcCW2x/z/bjwGeAM38uR/vrth+pnm5kgrtH7alJe9psL5e0BFhk+95p7uPNwKenGRsRERHRS44C7mh6vh04bZL1zwXW17Hjjo4erW4W/wbg9zu5n4iIiBgMe2P0qKRlwLKmojW210xjO2fTuA/7q+rIq9NTfpwOXG/7h5Ot1PziLDzsJI6bO9ThtCIiIiJaqxpoEzXS7gSObno+ryr7OZIWA38AvMr2Y3Xk1ekpP95CG6dGba+xPWx7OA22iIiImMiYOr9M4VrgeEnHVmcU3wysa15B0ouBvwTeYPueuo69qKdN0hHAKHAwMFZN63Gi7YclPY3GyIp3FWcZERER0QNs75L0buDLwGzgEts3S/oAMGp7HfC/gLnAP6gxhcj3bb+hdN9TNtpsD01SdzcTjIiw/RPg8GlnFhERETHOWA/cx9b25cDl48re1/R4cSf2mzsiRERERPSBtk6PSloKrB5XvMH2yvpTioiIiGitjXnUZqy2Gm22R4CRDucSERERERPo9JQfEREREbVpY3TnjJVGW3TEaw46vij+EPYpit+06/6ieIAFcw4rii/9XjloVtlrAHC49iuK3/rEw0Xx+6v8K+a5+xxSFP/rp6wqiv/c9R8riv+thU+5A+BeN6sHLtwuVXoBdh0Tso4Vnpgr/Tm8fdZRRfEAX2dH8Ta6bW9MrturMhAhIiIiog+kpy0iIiL6xiAPRGirp03SKkmbJa1tUXeCpKslPSbpwnF1F0i6WdJNkj4taf+6Eo+IiIgYJO32tK0AFtve3qLufmAVcFZzoaSjqvITbT8q6e9p3OrhU9PONiIiIgbaIA9EmLKnTdLFwHxgvaSnXFFr+x7b1wI7W4TPAQ6QNAc4EPhBYb4RERERA6md21gtl7QEWGT73nY3bPtOSR8Gvg88Clxh+4rppxoRERGDLqNHO0DSocCZwLHAc4CnSTq7U/uLiIiImMk6OeXHYuB22z+yvRP4HPCyVitKWiZpVNLo1h3bOphSRERE9LOxvbD0qk422r4PvETSgZIEvBbY3GpF22tsD9sePm7uUAdTioiIiOhPxfO0SToCGAUOBsYknU9jxOg1kj4LXA/sAr4NrCndX0RERAwuD/Do0XZvGD80Sd3dwLwJ6t4PvH9amUVERETEk3JHhIiIiOgbvXzNWae13WiTtBRYPa54g+2V9aYUEREREeO13WizPQKMdDCXiIiIiEkNck9bJ0ePRkRERERNck1bdMQO7yqK/3HLu6K17+mz9y+KB7jbPy3eRonZNfxN9bDLXsdDZu1XFL8LF8UDzKFsqNiBKvua+62FT7l73x752+s+WhQP8MoF/7Eo/sR9n1EUf+vO+4viAfZR2fv5oV2PFsUfs++hRfEAswrfi/tqdlH8V2Y9XBQPcKIOKt5Gt5V/q/Sv9LRFRERE9IH0tEVERETfGBvgedrS0xYRERHRB6ZstElaJWmzpLUt6k6QdLWkxyRdOK5utaSbJN1c3SUhIiIiosgg33u0ndOjK4DFtre3qLsfWAWc1Vwo6ReBdwKnAo8DX5L0T7a3lKUbERERMZgm7WmTdDEwH1gv6SlDqGzfY/taeMpQvxcC19h+xPYu4J+BX68p54iIiBhQg9zTNmmjzfZy4AfAItt7Mm79JuCVkg6XdCBwBnD09NOMiIiIaEz50emlV3VkIILtzcCHgCuALwE3AE9MtL6kZZJGJY1u3bGtEylFRERE9LWOjR61/UnbC23/EvAAcNsk666xPWx7+Li5Q51KKSIiIvrcmDq/9KqOzdMm6Vm275F0DI3r2V7SqX1FREREzHRFjTZJRwCjwMHAWDW1x4m2Hwb+UdLhNAYprLT9YGGuERERMeB6eaBAp03ZaLM9NEnd3cC8CepeOf20IiIiIqJZbmMVERERfaOXR3d2WluNNklLgdXjijfYXll/ShERERExXluNNtsjwEiHc4mIiIiY1NgA97Xl9Gj0JFE25np2TXn0u5lwwW6/H8MrF/zH4m38y6ZLiuLftPD8ovjZhZ9HgDGX/aI9Y//nFsX/y84fFsUDnD7nyKL4G9hRFF/6vQjwSN9/ogZbGm0RERHRNwa52dmxyXUjIiIioj7paYuIiIi+MbhXtLXZ0yZplaTNkta2qHurpE2SbpR0laSTmuqWSLpV0hZJ760z8YiIiIhB0m5P2wpgse3tLepuB15l+wFJpwNrgNMkzQb+AngdsB24VtI627fUkXhEREQMnlzTNglJFwPzgfWSLhhfb/sq2w9UTzfyszsknApssf09248DnwHOrCftiIiIiMHSzm2slktaAiyyfe8Uq58LrK8eHwXc0VS3HThtWllGREREAGPlM5/0rdpGj0paRKPR9nvTiF0maVTS6NYd2+pKKSIiImLGqKXRJmkB8AngTNv3VcV3Akc3rTavKnsK22tsD9sePm7uUB0pRURExAw0hju+9KriRpukY4DPAW+zfVtT1bXA8ZKOlbQv8GZgXen+IiIiIgZRHfO0vQ84HPi4JIBdVa/ZLknvBr5M465Cl9i+uYb9RURExIDq3X6wzmv3hvFDk9SdB5w3Qd3lwOXTyiwiIiJinEz5ERERERE9re3To5KWAqvHFW+wvbLelCIiIiJa6+WBAp3WdqPN9ggw0sFcIiIiImICuWF8RERE9I3B7WdLoy06ZBZlU1b3wsWWu7r81VD6GkL3TyPMqeUYuqv053Divs8ozuFNC88viv+76/60KP7Y57+hKB5g5UEnF8Xfyc6i+J96V1E8wP6lH6fCj0Mdn6fPPHxjUfyHizOIEmm0RURERN/o9h9y3dQLHRoRERERMYUpG22SVknaLGlti7q3Stok6UZJV0k6qanuEkn3SLqp7qQjIiJiMOU2VpNbAbzO9ltb1N0OvMr2i4APAmua6j4FLCnOMCIiIiImb7RJuhiYD6yXdMH4ettX2X6gerqRxk3hd9d9E7i/xlwjIiJiwHkvLL1q0oEItpdLWgIssn3vFNs6F1hfW2YRERER8aRaRo9KWkSj0faKOrYXERER0UpGjxaQtAD4BHCm7fumuY1lkkYljW7dsa00pYiIiIgZp6jRJukY4HPA22zfNt3t2F5je9j28HFzh0pSioiIiBnMe+FfryrtaXsfcDjwcUk3SBrdXSHp08DVwAskbZd0buG+IiIiIgbWlNe02R6apO484LwJ6t4y/bQiIiIinirXtEVERERET2tr9KikpcDqccUbbK+sP6WIiIiI1nr5jgWd1lajzfYIMNLhXCIiIiJiArXM0xYRERGxNwxuP1sabdGjdhV+LGeh4hzq2Ea3dfsYBvmC4d1u3Vl+N7/ZhT/HY5//hqL4229bVxQP8LIF7yiKnzfn6UXx58w+uige4IvTm4r0SQdrv6L4Oj5PVxz63Bq20l2DfHo0AxEiIiIi+kB62iIiIqJvDHIPfnraIiIiIvrAlI02SaskbZa0tkXdWyVtknSjpKsknVSVHy3p65JukXSzpPHThURERETssUG+jVU7p0dXAIttb29RdzvwKtsPSDodWAOcBuwCftv29ZIOAq6TdKXtW2rLPCIiImKATNrTJuliYD6wXtIF4+ttX2X7gerpRmBeVX6X7eurxz8GNgNH1Zl4REREDJ6xvbD0qkl72mwvl7QEWGT73im2dS6wfnyhpCHgxcA1000yIiIiYtDVMhBB0iIajbbfG1c+F/hH4HzbD08Sv0zSqKTRrTu21ZFSREREzECDfE1bcaNN0gLgE8CZ9s9mHpS0D40G21rbn5tsG7bX2B62PXzc3KHSlCIiIiJmnKJ52iQdA3wOeJvt25rKBXwS2Gz7I2UpRkRERDT08jVnnVba0/Y+4HDg45JukDRalb8ceBvwmqr8BklnFO4rIiIiYmBN2dNme2iSuvOA81qUfwtmwI0bIyIioqeMuXevOeu03BEhIiIiog+0dU2bpKXA+LsabLC9sv6UIiIiIlob3H62NhtttkeAkQ7nEhERERETKBo9Gp0xVvh3xKweuJyw/Lx72THMruE12NnlMUpzajiGXV3+m7QXjqHbn4d9VP5pKL2GZ+VBJxfFv2zBO4riAa7a9Kmi+KULLyyKP//urxfFA5z7nJcVxT/onUXxdXyvLbp3a1H8D4szKFf6O7Kf5Zq2iIiIiD0gaYmkWyVtkfTeFvX7Sfq7qv6a6u5QxdJoi4iIiL7R7TsiSJoN/AVwOnAi8BZJJ45b7VzgAdvPAz4KfKiOY0+jLSIiIqJ9pwJbbH/P9uPAZ4Azx61zJvDX1ePPAq+tbjxQpK1Gm6RVkjZLWtui7q2SNkm6UdJVkk6qyveX9K+SviPpZkn/vTTZiIiIGGxje2GZwlHAHU3Pt1dlLdexvQt4iMbNCIq0OxBhBbDY9vYWdbcDr7L9gKTTgTXAacBjwGts76juQ/otSettbyxNOiIiIgbT3hiIIGkZsKypaI3tNR3f8RSmbLRJuhiYD6yXdIntjzbX276q6elGYF5VbmBHVb5PtQzukI+IiIjoC1UDbaJG2p3A0U3P51VlrdbZLmkO8HTgvtK8pjw9ans58ANg0fgGWwvnAut3P5E0W9INwD3AlbavKcg1IiIiBly3ByIA1wLHSzpW0r7Am4F149ZZB7y9evybwNeqzqwitQ1EkLSIRqPt93aX2X7C9sk0WqGnSvrFCWKXSRqVNLp1x7a6UoqIiIioVXWN2ruBLwObgb+3fbOkD0h6Q7XaJ4HDJW0B3gM8ZVqQ6ahlcl1JC4BPAKfbfkr3n+0HJX0dWALc1KL+yW7INz33rJxCjYiIiJa6O+15g+3LgcvHlb2v6fFPgf+n7v0W97RJOgb4HPA227c1lT9T0iHV4wOA1wHfLd1fRERExCCqo6ftfTSGsX68moJkl+1h4Ejgr6tJ6GbR6D78pxr2FxEREQOqhkvD+la7N4wfmqTuPOC8FuWbgBdPO7OIiIiIeFJuGB8RERF9Y5BvGN92o03SUmD1uOINtlfWm1JEREREjNd2o832CDDSwVwiIiIiJtULo0e7JTeMj4iIiOgDuaatB81C3U6hWOlfQjPhr4k2ZtWe1FgPvA9mwnuxVOl78aFdjxbncMb+zy2Kv5OdRfHz5jy9KB5g6cILi+JHrvtwUfytC95RFA/wEz9RvI1ue+7Tnt3tFIqVfrf2s5nwuzEiIiJixktPW0RERPSNQR492lZPm6RVkjZLWtui7q2SNkm6UdJVkk4aVz9b0rclZWLdiIiIiGlqt6dtBbDY9vYWdbcDr7L9gKTTadxD9LSm+tU0bqh6cFGmERERMfAG+Y4IU/a0SboYmA+sl3TB+HrbV9l+oHq6EZjXFDsP+BUaN5OPiIiIiGmasqfN9nJJS4BFtu+dYvVzgfVNz/8U+F3goGlnGBEREVHJPG01kLSIRqPt96rnvwrcY/u6uvYRERERMahqabRJWkDjFOiZtu+ril8OvEHSNuAzwGskXTZB/DJJo5JGt+7YVkdKERERMQN5L/zrVcWNNknHAJ8D3mb7tt3ltn/f9jzbQ8Cbga/ZPrvVNmyvsT1se/i4uUOlKUVERETMOHXM0/Y+4HDg45IAdtkermG7ERERET9nkOdpa6vRVvWWTVR3HnDeFPHfAL6xB3lFREREPEWm/IiIiIiIntb26VFJS2lMlNtsg+2V9aYUERER0VpOj7bB9ggw0sFcIiIiImICuWF8RERE9I1enpKj09Jo60GlXb+zUE2ZdM+u4g9l+Ye69HUsja9j1u9uvxfKf47dP4bSn8Mx+x5anMO/7PxhUfxPvaso/pzZRxfFA5x/99eL4m9d8I6i+Ks2faooHuDtC3+7eBsldtbwrbBC5T/L6J402iIiIqJvjGX0aERERET0svS0RURERN8Y3H62NnraJK2StFnS2hZ1b5W0SdKNkq6SdFJT3baq/AZJo3UnHhERETFI2ulpWwEstr29Rd3twKtsPyDpdGANcFpT/SLb99aQZ0RERMRAz9M2aU+bpIuB+cB6SReMr7d9le0HqqcbgXn1pxgRERERk/a02V4uaQnt9ZidC6xvDgeukGTgL22vKUs1IiIiBt0g97TVMhBB0iIajbZXNBW/wvadkp4FXCnpu7a/OUH8MmAZwMLDTuK4uUN1pBURERExYxRP+SFpAfAJ4Ezb9+0ut31n9f89wOeBUyfahu01todtD6fBFhEREROx3fGlVxU12iQdA3wOeJvt25rKnybpoN2PgdcDN5XsKyIiImKQlZ4efR9wOPBxSQC7bA8DzwY+X5XNAf7W9pcK9xUREREDLte0TcL20CR15wHntSj/HnDSUyMiIiIiYjpyR4SIiIjoG05P2+QkLQVWjyveYHtl/SlFRERExHhtNdpsjwAjHc4lIiIiYlK9PLqz03J6tAfNQt1OoVi3LxSto/t8Jvwciuf0KTRWwzZKj6E0h9L3ch3vo9PnHFkUv3/hx+GLP5vNadrOfc7LiuJ/4ieK4t++8LeL4gH++ro/KYo/e+F7iuLr+F77x9kPFsW/oziDct3+/dJN3f5Oj4iIiIg2pKctIiIi+sYgnx5NT1tEREREH5iy0SZplaTNkta2qHurpE2SbpR0laSTmuoOkfRZSd+t4l9ad/IRERExWMZwx5de1c7p0RXAYtvbW9TdDrzK9gOSTgfWAKdVdX8GfMn2b0raFziwlowjIiIiBtCkPW2SLgbmA+slXTC+3vZVth+onm4E5lVxTwd+Cfhktd7jth+sMe+IiIgYQN4L/3rVpI0228uBHwCLbH90im2dC6yvHh8L/AgYkfRtSZ+obhwfEREREdNQy0AESYtoNNp+ryqaA5wCXGT7xcBPgPdOEr9M0qik0a07ttWRUkRERMxAY3bHl15V3GiTtAD4BHCm/eQMjNuB7bavqZ5/lkYjriXba2wP2x4+bu5QaUoRERERM07RPG2SjgE+B7zN9m27y23fLekOSS+wfSvwWuCWslQjIiJi0PXyNWedVjq57vuAw4GPSwLYZXu4qvvPwNpq5Oj3gKWF+4qIiIgYWFM22mwPTVJ3HnDeBHU3AMOt6iIiIiKmo5evOeu03BEhIiIiog+0dXpU0lJg9bjiDbZX1p9SRERERGu5pm0KtkeAkQ7nEhERERETKB2IEBEREbHXDPI1bT3XaHvDzrlF8bMKu033GyuL/4VDHph6pSl8a8fhRfGnzPpxUfzo2EFF8QBfmb2jeBtlVLyFsRqy6LZuH8OsGfBzKD2GfTW7OIcbKPw8Ff4YDtZ+ZRsAHvTO4m1029kL31MUf9l1H+nq/gEOVC5l72c912iLiIiImMggX9OWJndEREREH2ir0SZplaTNkta2qHurpE2SbpR0laSTmuoukHSzpJskfVrS/nUmHxEREYMl9x6d2grgdbbf2qLuduBVtl8EfBBYAyDpKGAVMGz7F4HZwJvLU46IiIhB5b3wr1dN2WiTdDEwH1gv6YLx9bavsr376vuNwLym6jnAAZLmAAcCPyhPOSIiImLwtHMbq+WSlgCLbN87xernAuuruDslfRj4PvAocIXtK0oTjoiIiMFld3tMeffUNhBB0iIajbbfq54fCpwJHAs8B3iapLPr2l9ERETEIKml0SZpAfAJ4Ezb91XFi4Hbbf/I9k7gc8DLJohfJmlU0ujXHvm3OlKKiIiIGWgMd3zpVcWNNknH0GiQvc32bU1V3wdeIulASQJeC2xutQ3ba2wP2x5+zYHHl6YUERERMePUMbnu+4DDgY832mbsqhpg10j6LHA9sAv4NtXI0oiIiIjpcA9PydFp7d4wfmiSuvOA8yaoez/w/mllFhERERFPym2sIiIiom/08jVnndZ2o03SUmD1uOINtlfWm1JEREREjNd2o832CDDSwVwiIiIiJjXI17TlhvERERERfaDnrmlbt8+ObqdQ5tH9yrcxu+w1uAJ1df8A9409WhT/otmHFsV/7affL4oHeMt+84vi79cTRfGXP3p7UTzAgv2PLIq/b+ynRfGHzCr/PDxPBxbFf23nXUXxb591VFH8V2Y9XBQPoMLP9JzC+Drmn59d+r1UaGcNR1F6T8qzF76nKP6y6z5SFF9HDr2gl2/o3mnpaYuIiIjoAz3X0xYRERExkdIez342ZU+bpFWSNkta26LurZI2SbpR0lWSTmqqWy3pJkk3Szq/5rwjIiIiBko7PW0rgMW2t7eoux14le0HJJ1O444Hp0n6ReCdwKnA48CXJP2T7S11JR4RERGDJ6NHJyDpYmA+sF7SBePrbV9l+4Hq6UZgXvX4hcA1th+xvQv4Z+DX60s7IiIiYrBM2tNme7mkJcAi2/dOsa1zgfXV45uAP5R0OPAocAYwWppsREREDLbcEaGQpEU0Gm2vALC9WdKHgCuAnwA3AGXzH0REREQMsOIpPyQtAD4BnGn7vt3ltj9pe6HtXwIeAG6bZBvLJI1KGt26Y1tpShERETFD2e740quKGm2SjgE+B7zN9m3j6p7VtM6vA3870XZsr7E9bHv4uLlDJSlFRETEDDZmd3zpVaWnR98HHA58XBLALtvDVd0/Vte07QRW2n6wcF8RERERA2vKRpvtoUnqzgPOm6DuldNPKyIiIuKpevn0ZaflNlYRERERfaCt06OSlgKrxxVvsL2y/pQiIiIiWsuUH1OwPQKMdDiXiIiIiL4m6TDg74AhYBvwxqYbEexe52TgIuBgGlOi/aHtv5tq2zk9GhEREX2jD6b8eC/wVdvHA1+tno/3CHCO7V8AlgB/KumQqTasXrug703PPau3EoqBNRO64GehovjS16B0/zPBC/S04m08wlhR/GcevrEo/opDn1sUD7Do3q1F8c992rOL4lfo6KJ4gH+c/WBR/IGqZT77Ipdd95Gi+H2eMb/rH+qDnza/41/OD//ke9M+Tkm3Aq+2fZekI4Fv2H7BFDHfAX7T9r9Ntl7330ERERERberledQqz7Z9V/X4bmDSvzgknQrsC0z5l00abRERERFNJC0DljUVrbG9pqn+K8ARLUL/oPmJbUuasJVZ9cT9DfB221N2qafRFhEREX3De+HSlaqBtmaS+sUT1Un6oaQjm06P3jPBegcDXwT+wPbGdvJqayCCpFWSNkta26LuTEmbJN1Q3T/0FU11b5f0b9Xy9nb2FREREdHH1gG72zxvB/7P+BUk7Qt8HrjU9mfb3XC7PW0rgMW2t7eo+yqwruoCXAD8PXBCNeT1/cAwYOA6SevGD3uNiIiIaFcfXNP2R8DfSzoX+HfgjQCShoHl1d2k3gj8EnC4pHdUce+wfcNkG56y0SbpYmA+sF7SJbY/2lxve0fT06fBk/2Wvwxcafv+ajtX0hjW+ump9hkRERHRj2zfB7y2Rfko1a0/bV8GXLan227n3qPLJS0BFtm+t9U6kn4N+J/As4BfqYqPAu5oWm17VRYRERExLb02VdneVMvkurY/b/sE4Czgg3saL2lZdT3c6NYd2+pIKSIiImJGqfWOCLa/CcyX9AzgTqB5NsN5VVmruDW2h20PHzd3qM6UIiIiYgbxXvjXq4obbZKeJ0nV41OA/YD7gC8Dr5d0qKRDgddXZRERERGxh+qYp+03gHMk7QQeBd7kxgnn+yV9ELi2Wu8DuwclREREREzHIF/T1lajzfbQJHUfAj40Qd0lwCXTyiwiIiIinpQ7IkRERETfSE9bGyQtBVaPK95ge2W9KUVERETEeG032myPACMdzCUiIiJiUoPbz0ajm7GfFmBZP8f3Qg45ht7IodvxvZBDjqE3csgx5DWocxtZOrfUOk/bXrKsz+N7IYccQ2/k0O34Xsghx9AbOeQY8hrUuY3okH5stEVEREQMnDTaIiIiIvpAPzba1vR5fC/kkGPojRy6Hd8LOeQYeiOHHENegzq3ER2i6sLDiIiIiOhh/djTFhERETFw0miLiIiI6ANptEVERET0gTTaIiIiIvpAT98wXtLTgSXAUVXRncCXbT9Yw7ZfZ/vKNtY7GHim7a3jyhfY3tRG/BEAtu+W9EzglcCttm+eZupI+h+2/99pxh4LvBi4xfZ321j/GOAe2z+VJOAdwCnALcBf2d7VxjbeAFxh+6fTybnaxi8BP7R9q6SXAy8FNtv+Ypvxc2m8l44GngBuq3Ia24McTgDO5Offj+tsb27/SFpud6kbt4lrZ/9HAdfY3tFUvsT2l9qIPxWw7WslnUjj9fiu7csLcr/U9jnTjH0FcCpwk+0r2lj/NBo/84clHQC8l5+9F/+H7Yfa2MYq4PO275hmzvsCbwZ+YPsrkn4LeBmwGVhje2cb25gP/Do//178W9sP70Eeff1erNat9f3Yh+/Frr+XOvk7NjqjZ0ePSjoHeD9wBY03EsA84HXAf7d9aeH2v2/7mCnWeSPwp8A9wD7AO2xfW9Vdb/uUKeLfRePDLOBDNBo8NwGvAP7Y9ifbyPNj44uAtwGXAtheNUX8F2yfVT0+szqeb9D4cviftj81RfxNwKm2H5H0IeA44AvAa6r9/8c2juFR4CfAeuDTNL4Unpgqrin+T2l8oc4Bvgy8ttrWq4Bv2/6dKeLfCFwIbAIWAVfR6GV+EfBW2ze2kcPvAW8BPgNsr4rn0fjS/YztP2r3eFpsu5334ipgJY0v9JOB1bb/T1XXznvx/cDpNF7DK4HTgK/T+Dx92fYftpHnuvFFNF7PrwHYfsMU8f9q+9Tq8Tur4/k88Hrg/071Gkq6GTjJ9i5Ja4BHgM/SeD+cZPvX2ziGh2i8F7fSeC/+g+0fTRXXFL+Wxmt4IPAgMBf4XJWDbL99ivhVwK8C3wTOAL5dbefXgBW2v9FGDn39XqzWK3o/zpD3YlffS53+HRsd0u37aE20ALcCh7QoPxS4rc1trJtg+b/AT9qIvwE4snp8KvBd4Neq599uI/5GGh/Iw4EdwBFNx3BDm8dwB3AZcA7w9mr50e7HbcR/u+nxVcCx1eNnAN9pI/6WpsfXAbOank8ZvzuH6pjfCXwV+CFwMfCqNuNvpvGlfCDwAHBgVb4Pjb+Mp4rf1BTzDBq/FAAWAFe1mcNtwD4tyvcF/q3NHFotNwKPtflemls9HgJGafyy3JP34uzqNXwYOLgqPwDY1OZrcH31Xnw1jQbzq4G7qsdT/izHvRevpdGDDfA04MY24jc35zKurt3P07dpNNhfD3yy+ix9qfo8HdTOz7H6f071Pp5dPVc7r+Pun0P1+EDgG9XjY9r5Oc6E92Id78cZ8l7s6nuJGn7HZtn7Sy+fHhXQqhtwrKprxyuBs2k0mMZv+9Q24mfbvgvA9r9KWgT8k6SjJ8htvJ22HwEekbTV9t3Vth6Q1G4X54nAB2l0YV9o+weS3m/7r9uMb97PHNu3VzncK6mdU4N3SHqN7a8B22h0w/+7pMPb3H+1Oz8A/BXwV9Up4zcCfyRpnu2j24h3U767j2mM9q7LFPBo9fgnwLOqjW6qTn+3Ywx4DvDv48qPrOqm8mzgl2k0OsfndlUb8bNcnYayvU3Sq4HPSnou7X0edrnRu7n7vfhwta1H23wfAAwDq4E/AH7H9g2SHrX9z23Gz5J0KI2fmVz1cNn+iaQpT7MDNzWdvvuOpGHbo5KeD0x5KqliN06JXwFcIWkfGj0+bwE+DDyzjWPYl8Yv9wOBpwP3A/vR+COiHXNonMraj0bvCra/X+XSjn5/L0L5+3EmvBe7/V6q43ds7GW93Gj7Q+B6SVfQ6G2Cxl8Qr6PRiGnHRuCRVh9kSbe2Ef9jSce5up7N9l3VF9QXgF9oI96S9nHj2oRfadr3/rQ5CMT2j4HzJS0E1kr6YruxlZMkPUzjQ7ifpCOr49iXxl+6UzkPuFTSfwMeAm6QdANwCPCeNnP4uS+AqvH6MeBj1Rf9VL4o6V+A/YFPAH8vaSONv6q/2Ub85cCXJH2TRuP3HwAkHTY+t0mcD3xV0r/x8+/H5wHvbiP+n2j0TtwwvkLSN9qI/6Gkk3fH294h6VeBS2ic5p3K45IOrP6IWNi076fT3i96qsbORyX9Q/X/D9mz75Cn0+itFY3Pxu734lza+zmcB/yZpP8C3AtcLekOGj+P89rMYfx7cSdVD7ykA9uI/ySNHvfZNBoM/yDpe8BLaJyunMongGslXUPjj8oPAahxvev9bR7D+fT3exEK348z5L3Y7fdSHb9jYy/r2WvaAKq/hH6Zp14kOf4vxE7t/yQajb5/G1e+D/BG22uniD8GuMvjLiiVdBTwQttf2cN8BKwAXmr77D2JbbGtQ6ocrm5z/RcCz6fxxbgduNZtXsQv6dVu41qdKbbxUhq9JBslHUfjuo3vA59tJw9JZ9DotfyOqwEokmbROM30WJs5zKLRQ9v8frzWe3B93nRJmkejd+LuFnUvt71hivj9Wh2npGfQuARgyuv6WsT+CvByT3NQTNN2DgSevbsXuI31DwaOpXov2v7hHuzr+bZvm16mT27jOQBVr/chwGLg+7b/tc34XwBeSOPU/pSDgSbYRt++F6v1an0/9uN7sYrv6nup279jY8/1dKMNQNKzaXpD7emHoo5tdDu+F3LIMUy63bluGkE3aPG9kEOOIaKcpMNst9vjG93gLl1MN9VCY1TSRhojlK4EvkKjK3kjcEqb23hx0za+sqfbGJfD+PgXdzq+jW20cwyTvQalx9Duz6H0GLoa38b2vz/I8b2QwyAdA40BNBtpnNJaAxzaVPeveyH+RSXxvZBDt1/DXsgBeHn1nXgzjdG7V9IYVX0HjbM5RZ+nLJ1Zevmatk8B77J9TXOhpJcAI8BJbWxjpHAbk+Xwqb0QP9U22jmGyV6DdnIo3X8d2+h2PJImun5PVBcAz+T4Xsghx/CkjwP/jcYv7POAb0l6gxvX3rZzAXpp/EWF8b2QQ7dfw17I4aM0BoTNBb4InGX7W5JOAf43jUZd9JheviPC08b/kgWwvZHGaJu9sY1ux/dCDjmGhv9BYyj8QeOWubT3Oer3+F7IIcfQcJDtL9l+0PaHaQw++FL1R0g717t0O74Xcuh2fC/ksI/tG924rvlHtr8FYPt6GlOvRA/q5Z629WqMlLyUn41sOZrGfGVtzbhdwza6Hd8LOeQYGq4HvmD7uvEVktoZLdbv8b2QQ47hZ+s+3dWs+7a/Luk3gH8EDuuH+F7IodvxPZBD8x8Jvz+ubt929h97X08PRJB0Oq1v1dL2bU5Kt9Ht+F7IIccAkl4A3Gf73hZ1z/YUgxr6Pb4XcsgxPLnebwHfq3qKm8uPAf6r7Xf2cnwv5NDt+F7IQY3bC37FjWlXmsuPA37D9h9PdQzRBe6BC+tKFuB/d3sb3Y7vhRxyDL2RQ7fjeyGHHENv5JBjyGuQpf6ll69pa1cdF0uWbqPb8b2QQ46hN3Lodnwv5JBj6I0ccgx5DaJmM6HRFhERETHjpdEWERER0Qd6ttEm6W+q/1dPtWqnttHt+F7IIcfQGzl0O74Xcsgx9EYOOYa8BtFF3b6obqIFuAV4DvAdGvMaHda8NK33jk5to9vxvZBDjqE3cuh2fC/kkGPojRxyDHkNsnRv6dkpPyStAv4TMJ/G9AzNrX3bnt/pbXQ7vhdyyDH0Rg7dju+FHHIMvZFDjiGvQXRRt1uNUy3ARd3eRrfjeyGHHENv5NDt+F7IIcfQGznkGPIaZNn7S8/2tEVERETEz8zqdgIRERERMbU02iIiIiL6QBptEREREX0gjbaIiIiIPpBGW0REREQf+P8BJiqgGCseyIwAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "corr = combi.corr()\n", + "f, ax = plt.subplots(figsize=(12, 9))\n", + "sns.heatmap(corr, vmax=.8, square=True);" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ba6957d3", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:33.692201Z", + "iopub.status.busy": "2022-05-11T11:03:33.691895Z", + "iopub.status.idle": "2022-05-11T11:03:33.715345Z", + "shell.execute_reply": "2022-05-11T11:03:33.714306Z" + }, + "papermill": { + "duration": 0.06877, + "end_time": "2022-05-11T11:03:33.718205", + "exception": false, + "start_time": "2022-05-11T11:03:33.649435", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " f_00 f_01 f_02 f_03 f_04 f_05 f_06 \\\n", + "f_00 1.000000 -0.000373 -0.000155 -0.001402 -0.000391 -0.000088 -0.001257 \n", + "f_01 -0.000373 1.000000 0.001066 0.000231 -0.001221 0.000028 -0.000963 \n", + "f_02 -0.000155 0.001066 1.000000 -0.000307 0.000768 -0.000394 0.000661 \n", + "f_03 -0.001402 0.000231 -0.000307 1.000000 0.000483 0.001895 -0.000237 \n", + "f_04 -0.000391 -0.001221 0.000768 0.000483 1.000000 0.000198 -0.000347 \n", + "f_05 -0.000088 0.000028 -0.000394 0.001895 0.000198 1.000000 0.001312 \n", + "f_06 -0.001257 -0.000963 0.000661 -0.000237 -0.000347 0.001312 1.000000 \n", + "f_07 0.000268 -0.001054 0.000925 0.000644 0.000317 0.000282 -0.000048 \n", + "f_08 -0.000945 -0.000522 -0.000284 -0.000512 0.000721 -0.000602 0.000229 \n", + "f_09 0.000175 -0.000363 0.000052 0.000678 0.000846 0.000830 0.000046 \n", + "f_10 0.000347 0.000363 0.000580 -0.000808 -0.002530 -0.000209 -0.000022 \n", + "f_11 0.001084 0.000897 -0.000922 0.000700 0.001119 -0.000094 -0.000209 \n", + "f_12 0.000300 -0.000526 0.001041 0.000740 0.001370 0.001240 0.000957 \n", + "f_13 0.001492 0.000129 -0.001560 -0.001747 -0.000624 0.000479 0.000286 \n", + "f_14 0.001599 0.000820 0.001278 -0.000388 0.000224 0.000056 -0.000079 \n", + "f_15 -0.000687 -0.000014 0.001568 -0.001294 0.000272 0.000727 0.000612 \n", + "f_16 0.000163 -0.000759 -0.000873 0.000486 -0.000711 -0.000264 -0.000025 \n", + "f_17 -0.000202 -0.000345 0.000392 0.001029 -0.000427 0.000930 0.000511 \n", + "f_18 -0.000226 -0.000116 -0.000205 0.000235 0.001201 0.000152 0.000143 \n", + "f_19 -0.000310 -0.000941 -0.000755 0.000652 -0.000401 0.000635 0.000661 \n", + "f_20 -0.000489 0.001159 0.000051 0.000124 0.000223 0.000278 -0.000120 \n", + "f_21 0.000768 -0.000546 -0.000196 0.000156 0.000082 0.000051 -0.000620 \n", + "f_22 -0.001141 -0.000207 -0.000636 0.000725 0.000627 0.000474 -0.000243 \n", + "f_23 0.000197 -0.000510 0.000123 0.000361 -0.001822 -0.000784 -0.001081 \n", + "f_24 0.001522 -0.000183 -0.000447 0.001403 0.000876 -0.000349 -0.001327 \n", + "f_25 0.000285 0.000097 -0.000667 0.001246 -0.001457 0.000513 0.001279 \n", + "f_26 -0.000172 0.000420 0.000305 -0.000089 -0.000656 0.000433 0.000436 \n", + "f_28 0.188822 0.194247 0.208815 0.328754 0.169644 0.296369 0.165917 \n", + "f_29 -0.000032 -0.000232 -0.000377 0.000161 0.000141 -0.000608 0.000858 \n", + "f_30 -0.000781 0.000817 -0.000630 -0.000484 -0.000664 -0.000723 0.000495 \n", + "\n", + " f_07 f_08 f_09 ... f_20 f_21 f_22 \\\n", + "f_00 0.000268 -0.000945 0.000175 ... -0.000489 0.000768 -0.001141 \n", + "f_01 -0.001054 -0.000522 -0.000363 ... 0.001159 -0.000546 -0.000207 \n", + "f_02 0.000925 -0.000284 0.000052 ... 0.000051 -0.000196 -0.000636 \n", + "f_03 0.000644 -0.000512 0.000678 ... 0.000124 0.000156 0.000725 \n", + "f_04 0.000317 0.000721 0.000846 ... 0.000223 0.000082 0.000627 \n", + "f_05 0.000282 -0.000602 0.000830 ... 0.000278 0.000051 0.000474 \n", + "f_06 -0.000048 0.000229 0.000046 ... -0.000120 -0.000620 -0.000243 \n", + "f_07 1.000000 0.113097 0.006419 ... -0.002478 0.001269 0.000464 \n", + "f_08 0.113097 1.000000 -0.067602 ... -0.004145 0.004364 -0.001753 \n", + "f_09 0.006419 -0.067602 1.000000 ... -0.004613 0.005410 -0.001343 \n", + "f_10 -0.089812 -0.085315 -0.050998 ... 0.003826 -0.003314 0.000885 \n", + "f_11 -0.134704 -0.102443 0.007286 ... 0.004513 -0.005434 0.002350 \n", + "f_12 0.093957 0.011274 0.035231 ... -0.000851 -0.000410 -0.001399 \n", + "f_13 0.059271 0.038777 -0.047612 ... 0.002026 -0.001728 0.000846 \n", + "f_14 -0.061668 -0.043743 0.032393 ... 0.001488 -0.002593 -0.001102 \n", + "f_15 0.055368 0.010021 -0.000042 ... -0.002765 0.003701 -0.001018 \n", + "f_16 0.062274 0.049848 -0.019949 ... 0.001838 -0.001320 0.001152 \n", + "f_17 -0.149490 -0.049167 0.005606 ... 0.001073 0.000038 -0.000294 \n", + "f_18 0.026738 0.066641 -0.051665 ... -0.001864 0.000212 0.000678 \n", + "f_19 -0.003340 -0.005489 -0.010161 ... -0.081401 0.027210 -0.070250 \n", + "f_20 -0.002478 -0.004145 -0.004613 ... 1.000000 -0.012057 -0.063454 \n", + "f_21 0.001269 0.004364 0.005410 ... -0.012057 1.000000 -0.155678 \n", + "f_22 0.000464 -0.001753 -0.001343 ... -0.063454 -0.155678 1.000000 \n", + "f_23 0.005402 0.002331 0.008653 ... -0.062111 0.116442 -0.088164 \n", + "f_24 0.002533 0.003782 0.009826 ... 0.087198 0.054088 -0.016601 \n", + "f_25 -0.001417 -0.002369 -0.005436 ... -0.062476 -0.084967 0.149289 \n", + "f_26 -0.003336 -0.004363 -0.008274 ... 0.116582 0.139453 -0.035476 \n", + "f_28 0.000336 -0.000379 -0.000692 ... 0.000399 0.000206 -0.000204 \n", + "f_29 -0.048202 0.065008 -0.103734 ... -0.003260 0.003343 0.001211 \n", + "f_30 0.000243 0.000014 -0.001397 ... 0.072440 -0.154615 0.315087 \n", + "\n", + " f_23 f_24 f_25 f_26 f_28 f_29 f_30 \n", + "f_00 0.000197 0.001522 0.000285 -0.000172 0.188822 -0.000032 -0.000781 \n", + "f_01 -0.000510 -0.000183 0.000097 0.000420 0.194247 -0.000232 0.000817 \n", + "f_02 0.000123 -0.000447 -0.000667 0.000305 0.208815 -0.000377 -0.000630 \n", + "f_03 0.000361 0.001403 0.001246 -0.000089 0.328754 0.000161 -0.000484 \n", + "f_04 -0.001822 0.000876 -0.001457 -0.000656 0.169644 0.000141 -0.000664 \n", + "f_05 -0.000784 -0.000349 0.000513 0.000433 0.296369 -0.000608 -0.000723 \n", + "f_06 -0.001081 -0.001327 0.001279 0.000436 0.165917 0.000858 0.000495 \n", + "f_07 0.005402 0.002533 -0.001417 -0.003336 0.000336 -0.048202 0.000243 \n", + "f_08 0.002331 0.003782 -0.002369 -0.004363 -0.000379 0.065008 0.000014 \n", + "f_09 0.008653 0.009826 -0.005436 -0.008274 -0.000692 -0.103734 -0.001397 \n", + "f_10 -0.003461 -0.003243 0.001780 0.004973 -0.000685 0.119974 -0.001564 \n", + "f_11 -0.003118 -0.007147 0.002607 0.006271 0.000484 0.079092 -0.003122 \n", + "f_12 0.003382 0.000881 -0.000297 -0.000923 0.000998 -0.111824 -0.001839 \n", + "f_13 -0.002761 -0.002755 0.000574 0.003935 -0.000507 0.006873 -0.000301 \n", + "f_14 -0.003426 -0.001403 -0.000042 0.003581 0.001091 -0.038155 -0.001668 \n", + "f_15 0.005291 0.005004 -0.001737 -0.004642 -0.000461 -0.001241 0.000669 \n", + "f_16 -0.002896 -0.003837 0.002054 0.004904 -0.001156 0.090912 0.000364 \n", + "f_17 -0.001100 -0.001235 -0.000385 0.002392 0.000479 -0.077153 0.000416 \n", + "f_18 0.001505 0.002620 -0.000008 -0.001464 -0.000377 0.158233 0.000114 \n", + "f_19 -0.057858 -0.101625 -0.020306 -0.014043 -0.000726 -0.008606 0.096469 \n", + "f_20 -0.062111 0.087198 -0.062476 0.116582 0.000399 -0.003260 0.072440 \n", + "f_21 0.116442 0.054088 -0.084967 0.139453 0.000206 0.003343 -0.154615 \n", + "f_22 -0.088164 -0.016601 0.149289 -0.035476 -0.000204 0.001211 0.315087 \n", + "f_23 1.000000 0.011719 -0.231012 0.039106 -0.001049 0.009140 -0.171359 \n", + "f_24 0.011719 1.000000 -0.086888 0.013721 0.000375 0.009089 -0.083660 \n", + "f_25 -0.231012 -0.086888 1.000000 0.010003 0.000412 -0.004410 0.178792 \n", + "f_26 0.039106 0.013721 0.010003 1.000000 -0.000673 -0.008045 -0.022952 \n", + "f_28 -0.001049 0.000375 0.000412 -0.000673 1.000000 -0.000940 -0.000932 \n", + "f_29 0.009140 0.009089 -0.004410 -0.008045 -0.000940 1.000000 -0.000315 \n", + "f_30 -0.171359 -0.083660 0.178792 -0.022952 -0.000932 -0.000315 1.000000 \n", + "\n", + "[30 rows x 30 columns]\n" + ] + } + ], + "source": [ + "print(corr)" + ] + }, + { + "cell_type": "markdown", + "id": "513955a1", + "metadata": { + "papermill": { + "duration": 0.040925, + "end_time": "2022-05-11T11:03:33.801214", + "exception": false, + "start_time": "2022-05-11T11:03:33.760289", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Remove columns that have a high correlation" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "830ae1b5", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:33.885247Z", + "iopub.status.busy": "2022-05-11T11:03:33.884695Z", + "iopub.status.idle": "2022-05-11T11:03:34.454640Z", + "shell.execute_reply": "2022-05-11T11:03:34.453834Z" + }, + "papermill": { + "duration": 0.614369, + "end_time": "2022-05-11T11:03:34.456908", + "exception": false, + "start_time": "2022-05-11T11:03:33.842539", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
f_00f_01f_02f_03f_04f_05f_06f_07f_08f_09...f_20f_21f_22f_23f_24f_25f_26f_28f_29f_30
0-1.3732460.238887-0.2433760.567405-0.6477150.8393260.113133151...-0.9197173.058541-2.5407390.766952-2.730628-0.2081771.36340267.60915300
11.697021-1.710322-2.230332-0.5456611.113173-1.5521750.447825134...-1.0754342.1790502.278315-0.633658-1.217077-3.782194-0.058316377.09641500
21.6817260.616746-1.0276890.810492-0.6090860.113965-0.708660102...-3.485342-0.784235-1.385775-0.520558-0.0091212.788536-3.703488-195.59970202
3-0.118172-0.587835-0.8046382.0868220.371005-0.128831-0.282575321...-2.100177-2.3438190.572594-1.6532131.686035-2.533098-0.608601210.82620500
41.148481-0.176567-0.664871-1.1013430.4678750.5001170.407515330...0.6050331.133665-3.912929-1.4303662.127649-3.3067844.371371-217.21179801
..................................................................
6999950.6401100.897808-0.5239561.563760-0.092281-0.6108670.535426016...-2.1851902.6040481.1228670.5181101.2438370.5751110.076372204.18653900
699996-0.191771-0.035246-0.1185330.5847502.1269770.568659-0.052663434...-0.2395523.0298571.384682-1.1357402.982713-1.5117602.225218-97.69459102
699997-0.331704-0.328845-1.1855031.022128-0.483099-0.107146-0.968281112...-0.9226264.021273-1.8452661.096011-2.734508-4.885955-2.248739130.62274510
699998-2.031073-1.2383980.964699-1.0459500.9060640.634301-0.707474511...-3.0799961.453864-1.6966061.0189951.973697-0.353068-3.333449-364.62514800
699999-0.085906-0.0021242.2273750.2171453.179153-1.6601880.891989034...-2.128546-3.549082-4.325318-5.0172210.251268-3.236026-0.362070-155.41734201
\n", + "

1600000 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " f_00 f_01 f_02 f_03 f_04 f_05 f_06 \\\n", + "0 -1.373246 0.238887 -0.243376 0.567405 -0.647715 0.839326 0.113133 \n", + "1 1.697021 -1.710322 -2.230332 -0.545661 1.113173 -1.552175 0.447825 \n", + "2 1.681726 0.616746 -1.027689 0.810492 -0.609086 0.113965 -0.708660 \n", + "3 -0.118172 -0.587835 -0.804638 2.086822 0.371005 -0.128831 -0.282575 \n", + "4 1.148481 -0.176567 -0.664871 -1.101343 0.467875 0.500117 0.407515 \n", + "... ... ... ... ... ... ... ... \n", + "699995 0.640110 0.897808 -0.523956 1.563760 -0.092281 -0.610867 0.535426 \n", + "699996 -0.191771 -0.035246 -0.118533 0.584750 2.126977 0.568659 -0.052663 \n", + "699997 -0.331704 -0.328845 -1.185503 1.022128 -0.483099 -0.107146 -0.968281 \n", + "699998 -2.031073 -1.238398 0.964699 -1.045950 0.906064 0.634301 -0.707474 \n", + "699999 -0.085906 -0.002124 2.227375 0.217145 3.179153 -1.660188 0.891989 \n", + "\n", + " f_07 f_08 f_09 ... f_20 f_21 f_22 f_23 \\\n", + "0 1 5 1 ... -0.919717 3.058541 -2.540739 0.766952 \n", + "1 1 3 4 ... -1.075434 2.179050 2.278315 -0.633658 \n", + "2 1 0 2 ... -3.485342 -0.784235 -1.385775 -0.520558 \n", + "3 3 2 1 ... -2.100177 -2.343819 0.572594 -1.653213 \n", + "4 3 3 0 ... 0.605033 1.133665 -3.912929 -1.430366 \n", + "... ... ... ... ... ... ... ... ... \n", + "699995 0 1 6 ... -2.185190 2.604048 1.122867 0.518110 \n", + "699996 4 3 4 ... -0.239552 3.029857 1.384682 -1.135740 \n", + "699997 1 1 2 ... -0.922626 4.021273 -1.845266 1.096011 \n", + "699998 5 1 1 ... -3.079996 1.453864 -1.696606 1.018995 \n", + "699999 0 3 4 ... -2.128546 -3.549082 -4.325318 -5.017221 \n", + "\n", + " f_24 f_25 f_26 f_28 f_29 f_30 \n", + "0 -2.730628 -0.208177 1.363402 67.609153 0 0 \n", + "1 -1.217077 -3.782194 -0.058316 377.096415 0 0 \n", + "2 -0.009121 2.788536 -3.703488 -195.599702 0 2 \n", + "3 1.686035 -2.533098 -0.608601 210.826205 0 0 \n", + "4 2.127649 -3.306784 4.371371 -217.211798 0 1 \n", + "... ... ... ... ... ... ... \n", + "699995 1.243837 0.575111 0.076372 204.186539 0 0 \n", + "699996 2.982713 -1.511760 2.225218 -97.694591 0 2 \n", + "699997 -2.734508 -4.885955 -2.248739 130.622745 1 0 \n", + "699998 1.973697 -0.353068 -3.333449 -364.625148 0 0 \n", + "699999 0.251268 -3.236026 -0.362070 -155.417342 0 1 \n", + "\n", + "[1600000 rows x 30 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns = np.full((corr.shape[0],), True, dtype=bool)\n", + "for i in range(corr.shape[0]):\n", + " for j in range(i+1, corr.shape[0]):\n", + " if corr.iloc[i,j] >= 0.80:\n", + " if columns[j]:\n", + " columns[j] = False\n", + "selected_columns = combi.columns[columns]\n", + "combi = combi[selected_columns]\n", + "combi" + ] + }, + { + "cell_type": "markdown", + "id": "8a8e289f", + "metadata": { + "papermill": { + "duration": 0.041725, + "end_time": "2022-05-11T11:03:34.540522", + "exception": false, + "start_time": "2022-05-11T11:03:34.498797", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Scale data" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d42bf25d", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:34.627208Z", + "iopub.status.busy": "2022-05-11T11:03:34.626677Z", + "iopub.status.idle": "2022-05-11T11:03:35.523105Z", + "shell.execute_reply": "2022-05-11T11:03:35.522255Z" + }, + "papermill": { + "duration": 0.942607, + "end_time": "2022-05-11T11:03:35.525139", + "exception": false, + "start_time": "2022-05-11T11:03:34.582532", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
f_00f_01f_02f_03f_04f_05f_06f_07f_08f_09...f_20f_21f_22f_23f_24f_25f_26f_28f_29f_30
00.3152380.5300250.4580380.5467490.4228710.5913720.5127520.06250.31250.0625...0.4547610.5895320.4014550.5323430.3759160.5071060.5755930.5275300.00.0
10.6098900.3298680.2511640.4303040.6044530.3548990.5473800.06250.18750.2500...0.4479120.5578560.6091940.4752870.4383860.3688030.5233510.6533730.00.0
20.6084230.5688260.3763790.5721800.4268540.5196480.4277300.06250.00000.1250...0.3419030.4511310.4512430.4798940.4882440.6230690.3894040.4205050.01.0
30.4356870.4451320.3996020.7057050.5279210.4956400.4718120.18750.12500.0625...0.4028350.3949610.5356640.4337540.5582100.4171390.5031300.5857640.00.0
40.5572470.4873640.4141540.3721700.5379100.5578310.5432090.18750.18750.0000...0.5218330.5202060.3423030.4428320.5764370.3872000.6861250.4117170.00.5
..................................................................
6999950.5084590.5976870.4288250.6509840.4801470.4479760.5564430.00000.06250.3750...0.3990950.5731630.5593850.5222060.5399580.5374160.5283000.5830640.00.0
6999960.4286240.5018750.4710360.5485640.7089960.5646080.4955990.25000.18750.2500...0.4846810.5884990.5706710.4548340.6117290.4566610.6072620.4603140.01.0
6999970.4151950.4717270.3599480.5943210.4398460.4977840.4008690.06250.06250.1250...0.4546330.6242050.4314350.5457470.3757560.3260910.4428610.5531521.00.0
6999980.2521070.3783280.5838180.3779650.5830960.5710990.4278520.31250.06250.0625...0.3597340.5317380.4378440.5426100.5700830.5014990.4030010.3517760.00.0
6999990.4387840.5052770.7152830.5101060.8174960.3442190.5933330.00000.18750.2500...0.4015870.3515530.3245250.2967170.4989910.3899380.5121890.4368430.00.5
\n", + "

1600000 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " f_00 f_01 f_02 f_03 f_04 f_05 f_06 \\\n", + "0 0.315238 0.530025 0.458038 0.546749 0.422871 0.591372 0.512752 \n", + "1 0.609890 0.329868 0.251164 0.430304 0.604453 0.354899 0.547380 \n", + "2 0.608423 0.568826 0.376379 0.572180 0.426854 0.519648 0.427730 \n", + "3 0.435687 0.445132 0.399602 0.705705 0.527921 0.495640 0.471812 \n", + "4 0.557247 0.487364 0.414154 0.372170 0.537910 0.557831 0.543209 \n", + "... ... ... ... ... ... ... ... \n", + "699995 0.508459 0.597687 0.428825 0.650984 0.480147 0.447976 0.556443 \n", + "699996 0.428624 0.501875 0.471036 0.548564 0.708996 0.564608 0.495599 \n", + "699997 0.415195 0.471727 0.359948 0.594321 0.439846 0.497784 0.400869 \n", + "699998 0.252107 0.378328 0.583818 0.377965 0.583096 0.571099 0.427852 \n", + "699999 0.438784 0.505277 0.715283 0.510106 0.817496 0.344219 0.593333 \n", + "\n", + " f_07 f_08 f_09 ... f_20 f_21 f_22 f_23 \\\n", + "0 0.0625 0.3125 0.0625 ... 0.454761 0.589532 0.401455 0.532343 \n", + "1 0.0625 0.1875 0.2500 ... 0.447912 0.557856 0.609194 0.475287 \n", + "2 0.0625 0.0000 0.1250 ... 0.341903 0.451131 0.451243 0.479894 \n", + "3 0.1875 0.1250 0.0625 ... 0.402835 0.394961 0.535664 0.433754 \n", + "4 0.1875 0.1875 0.0000 ... 0.521833 0.520206 0.342303 0.442832 \n", + "... ... ... ... ... ... ... ... ... \n", + "699995 0.0000 0.0625 0.3750 ... 0.399095 0.573163 0.559385 0.522206 \n", + "699996 0.2500 0.1875 0.2500 ... 0.484681 0.588499 0.570671 0.454834 \n", + "699997 0.0625 0.0625 0.1250 ... 0.454633 0.624205 0.431435 0.545747 \n", + "699998 0.3125 0.0625 0.0625 ... 0.359734 0.531738 0.437844 0.542610 \n", + "699999 0.0000 0.1875 0.2500 ... 0.401587 0.351553 0.324525 0.296717 \n", + "\n", + " f_24 f_25 f_26 f_28 f_29 f_30 \n", + "0 0.375916 0.507106 0.575593 0.527530 0.0 0.0 \n", + "1 0.438386 0.368803 0.523351 0.653373 0.0 0.0 \n", + "2 0.488244 0.623069 0.389404 0.420505 0.0 1.0 \n", + "3 0.558210 0.417139 0.503130 0.585764 0.0 0.0 \n", + "4 0.576437 0.387200 0.686125 0.411717 0.0 0.5 \n", + "... ... ... ... ... ... ... \n", + "699995 0.539958 0.537416 0.528300 0.583064 0.0 0.0 \n", + "699996 0.611729 0.456661 0.607262 0.460314 0.0 1.0 \n", + "699997 0.375756 0.326091 0.442861 0.553152 1.0 0.0 \n", + "699998 0.570083 0.501499 0.403001 0.351776 0.0 0.0 \n", + "699999 0.498991 0.389938 0.512189 0.436843 0.0 0.5 \n", + "\n", + "[1600000 rows x 30 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combi = (combi - combi.min()) / (combi.max() - combi.min())\n", + "combi" + ] + }, + { + "cell_type": "markdown", + "id": "3d80cfe3", + "metadata": { + "papermill": { + "duration": 0.042442, + "end_time": "2022-05-11T11:03:35.610293", + "exception": false, + "start_time": "2022-05-11T11:03:35.567851", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Define X and y" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "4d4770db", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:35.697945Z", + "iopub.status.busy": "2022-05-11T11:03:35.697596Z", + "iopub.status.idle": "2022-05-11T11:03:35.702099Z", + "shell.execute_reply": "2022-05-11T11:03:35.701135Z" + }, + "papermill": { + "duration": 0.051241, + "end_time": "2022-05-11T11:03:35.704229", + "exception": false, + "start_time": "2022-05-11T11:03:35.652988", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "y = target\n", + "X = combi[: len(train)]\n", + "X_test = combi[len(train) :]" + ] + }, + { + "cell_type": "markdown", + "id": "5d8bb6c5", + "metadata": { + "papermill": { + "duration": 0.043146, + "end_time": "2022-05-11T11:03:35.790146", + "exception": false, + "start_time": "2022-05-11T11:03:35.747000", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Split dataset for training and validation" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "3bb874fb", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:35.878461Z", + "iopub.status.busy": "2022-05-11T11:03:35.878177Z", + "iopub.status.idle": "2022-05-11T11:03:36.552140Z", + "shell.execute_reply": "2022-05-11T11:03:36.551242Z" + }, + "papermill": { + "duration": 0.721353, + "end_time": "2022-05-11T11:03:36.554347", + "exception": false, + "start_time": "2022-05-11T11:03:35.832994", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((810000, 30), (90000, 30), (810000,), (90000,), (700000, 30))" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)\n", + "X_train.shape, X_val.shape, y_train.shape,y_val.shape, X_test.shape" + ] + }, + { + "cell_type": "markdown", + "id": "383f9ee5", + "metadata": { + "papermill": { + "duration": 0.04255, + "end_time": "2022-05-11T11:03:36.640114", + "exception": false, + "start_time": "2022-05-11T11:03:36.597564", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Select model - Logistic Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b1d9af4e", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:36.727674Z", + "iopub.status.busy": "2022-05-11T11:03:36.727395Z", + "iopub.status.idle": "2022-05-11T11:03:45.348105Z", + "shell.execute_reply": "2022-05-11T11:03:45.346914Z" + }, + "papermill": { + "duration": 8.667486, + "end_time": "2022-05-11T11:03:45.350488", + "exception": false, + "start_time": "2022-05-11T11:03:36.683002", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.614883950617284\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "model = LogisticRegression(random_state=42).fit(X_train, y_train)\n", + "print(model.score(X_train, y_train))" + ] + }, + { + "cell_type": "markdown", + "id": "ea710141", + "metadata": { + "papermill": { + "duration": 0.043577, + "end_time": "2022-05-11T11:03:45.437486", + "exception": false, + "start_time": "2022-05-11T11:03:45.393909", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Predict on validation set" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "110776e9", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:45.528247Z", + "iopub.status.busy": "2022-05-11T11:03:45.527682Z", + "iopub.status.idle": "2022-05-11T11:03:45.563979Z", + "shell.execute_reply": "2022-05-11T11:03:45.563197Z" + }, + "papermill": { + "duration": 0.084883, + "end_time": "2022-05-11T11:03:45.567687", + "exception": false, + "start_time": "2022-05-11T11:03:45.482804", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.6136444444444444\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_val)\n", + "print(model.score(X_val, y_val))" + ] + }, + { + "cell_type": "markdown", + "id": "4d6493b1", + "metadata": { + "papermill": { + "duration": 0.050645, + "end_time": "2022-05-11T11:03:45.700862", + "exception": false, + "start_time": "2022-05-11T11:03:45.650217", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Confusion matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "3e1a7d9c", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:45.790395Z", + "iopub.status.busy": "2022-05-11T11:03:45.790092Z", + "iopub.status.idle": "2022-05-11T11:03:45.815750Z", + "shell.execute_reply": "2022-05-11T11:03:45.815037Z" + }, + "papermill": { + "duration": 0.073088, + "end_time": "2022-05-11T11:03:45.817848", + "exception": false, + "start_time": "2022-05-11T11:03:45.744760", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[30030 16191]\n", + " [18581 25198]]\n" + ] + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "\n", + "print(confusion_matrix(y_val, y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "98219d25", + "metadata": { + "papermill": { + "duration": 0.043722, + "end_time": "2022-05-11T11:03:45.906025", + "exception": false, + "start_time": "2022-05-11T11:03:45.862303", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Predict on test set" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "e3af5809", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:45.998329Z", + "iopub.status.busy": "2022-05-11T11:03:45.997865Z", + "iopub.status.idle": "2022-05-11T11:03:46.178067Z", + "shell.execute_reply": "2022-05-11T11:03:46.176967Z" + }, + "papermill": { + "duration": 0.230467, + "end_time": "2022-05-11T11:03:46.181754", + "exception": false, + "start_time": "2022-05-11T11:03:45.951287", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 0, ..., 1, 0, 0])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preds = model.predict(X_test)\n", + "preds = preds.astype(int)\n", + "preds[preds < 0] = 0\n", + "preds" + ] + }, + { + "cell_type": "markdown", + "id": "542f7a58", + "metadata": { + "papermill": { + "duration": 0.051137, + "end_time": "2022-05-11T11:03:46.316333", + "exception": false, + "start_time": "2022-05-11T11:03:46.265196", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Submit" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "d9c151cc", + "metadata": { + "execution": { + "iopub.execute_input": "2022-05-11T11:03:46.407139Z", + "iopub.status.busy": "2022-05-11T11:03:46.406885Z", + "iopub.status.idle": "2022-05-11T11:03:47.731695Z", + "shell.execute_reply": "2022-05-11T11:03:47.730883Z" + }, + "papermill": { + "duration": 1.372749, + "end_time": "2022-05-11T11:03:47.733818", + "exception": false, + "start_time": "2022-05-11T11:03:46.361069", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtarget
09000000
19000011
29000020
39000030
49000041
.........
69999515999951
69999615999961
69999715999971
69999815999980
69999915999990
\n", + "

700000 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " id target\n", + "0 900000 0\n", + "1 900001 1\n", + "2 900002 0\n", + "3 900003 0\n", + "4 900004 1\n", + "... ... ...\n", + "699995 1599995 1\n", + "699996 1599996 1\n", + "699997 1599997 1\n", + "699998 1599998 0\n", + "699999 1599999 0\n", + "\n", + "[700000 rows x 2 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "submission.target = preds\n", + "submission.to_csv('submission.csv', index=False)\n", + "submission = pd.read_csv(\"submission.csv\")\n", + "submission" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" + }, + "papermill": { + "default_parameters": {}, + "duration": 54.233478, + "end_time": "2022-05-11T11:03:48.600102", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2022-05-11T11:02:54.366624", + "version": "2.3.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}