File size: 1,180 Bytes
cbe2b03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
import numpy as np
import pickle
import os 



# DIRPATH = os.path.dirname(os.path.realpath(__file__))

pipeline_pkl = "full_pipeline.pkl"

def load_pickle(filename):
    with open(filename, 'rb') as file:
        data = pickle.load(file)
        return data

preprocessor = load_pickle(pipeline_pkl)


def create_new_columns(train_data):
    train_data['Monthly Variations'] = (train_data.loc[:, 'TotalCharges']) -((train_data.loc[:, 'tenure'] * train_data.loc[:, 'MonthlyCharges']))
    labels =['{0}-{1}'.format(i, i+2) for i in range(0, 73, 3)]
    train_data['tenure_group'] = pd.cut(train_data['tenure'], bins=(range(0, 78, 3)), right=False, labels=labels)
    train_data.drop(columns=['tenure'], inplace=True)
    return train_data




def create_processed_dataframe(processed_data, train_data):
    train_num_cols=train_data.select_dtypes(exclude=['object', 'category']).columns
    cat_features = preprocessor.named_transformers_['categorical']['cat_encoder'].get_feature_names()
    labels = np.concatenate([train_num_cols, cat_features])
    processed_dataframe = pd.DataFrame(processed_data.toarray(), columns=labels)
    return processed_dataframe