File size: 5,309 Bytes
1b5b1a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import pandas as pd
import numpy as np
import pickle
from io import StringIO
from fastapi.responses import JSONResponse
# from cachetools import cached, TTLCache

# # Define the cache
# cache = TTLCache(maxsize=5, ttl=3600,)  # Cache with a maximum size of 1 and a TTL of 1 hour

# # # Load the model
# @cached(cache)
def load_pickle(filename):
    with open(filename, 'rb') as file:
        contents = pickle.load(file)
    return contents



def feature_engineering(data):
    data['Insurance'] = data['Insurance'].astype(int).astype(str) # run function to create new features
    # create features 
    data['All-Product']  = data['Blood Work Result-4'] * data['Blood Work Result-1']* data['Blood Work Result-2']* data['Blood Work Result-3'] * data['Plasma Glucose']* data['Blood Pressure'] * data['Age']* data['Body Mass Index'] # Multiply all numerical features

    all_labels =['{0}-{1}'.format(i, i+500000000000) for i in range(0, round(2714705253292.0312),500000000000)]
    data['All-Product_range'] = pd.cut(data['All-Product'], bins=(range(0, 3500000000000, 500000000000)), right=False, labels=all_labels)
    
    age_labels =['{0}-{1}'.format(i, i+20) for i in range(0, 83,20)]
    data['Age Group'] = pd.cut(data['Age'], bins=(range(0, 120, 20)), right=False, labels=age_labels) # create categorical features for age

    labels =['{0}-{1}'.format(i, i+30) for i in range(0, round(67.1),30)]
    data['BMI_range'] = pd.cut(data['Body Mass Index'], bins=(range(0, 120, 30)), right=False, labels=labels) # create categorical features for bodey mass index

    bp_labels =['{0}-{1}'.format(i, i+50) for i in range(0, round(122),50)] 
    data['BP_range'] = pd.cut(data['Blood Pressure'], bins=(range(0, 200, 50)), right=False, labels=bp_labels) # create categorical features for blood pressure

    labels =['{0}-{1}'.format(i, i+7) for i in range(0, round(17),7)]
    data['PG_range'] = pd.cut(data['Plasma Glucose'], bins=(range(0, 28, 7)), right=False, labels=labels) # create categorical features for plasma glucose

    data.drop(columns=['Blood Pressure', 'Age', 'Body Mass Index','Plasma Glucose', 'All-Product', 'Blood Work Result-3', 'Blood Work Result-2'], inplace=True) # drop unused columns

    


def combine_cats_nums(transformed_data, full_pipeline):
    cat_features = full_pipeline.named_transformers_['categorical']['cat_encoder'].get_feature_names() # get the feature from the categorical transformer
    num_features = ['Blood Work Result-1', 'Blood Work Result-4']
    columns_ = np.concatenate([num_features, cat_features]) # concatenate numerical and categorical features
    prepared_data = pd.DataFrame(transformed_data, columns=columns_) # create a dataframe from the transformed data
    prepared_data = prepared_data.rename(columns={'x0_0':'Insurance_0', 'x0_1': 'Insurance_1'}) # rename columns
    

def make_prediction(data, transformer, model):
    new_columns = return_columns() 
    dict_new_old_cols = dict(zip(data.columns, new_columns))
    data = data.rename(columns=dict_new_old_cols)
    feature_engineering(data) # create new features
    transformed_data = transformer.transform(data) # transform the data using the transformer    
    combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data 
    # make prediction
    label = model.predict(transformed_data) # make a prediction
    probs = model.predict_proba(transformed_data)
    return label, probs.max()



# function to create a new column 'Bmi'
def process_label(row):
    if row['Predicted Label'] == 1:
        return 'Sepsis status is Positive'
    elif row['Predicted Label'] == 0:
        return 'Sepsis status is Negative'

def return_columns():
    # create new columns
    new_columns =  ['Plasma Glucose','Blood Work Result-1', 'Blood Pressure', 
                    'Blood Work Result-2', 'Blood Work Result-3', 'Body Mass Index',
                    'Blood Work Result-4', 'Age', 'Insurance']
    return new_columns


def process_json_csv(contents, file_type, valid_formats):

    # Read the file contents as a byte string
    contents = contents.decode()  # Decode the byte string to a regular string
    new_columns = return_columns() # return new_columns
    if file_type == valid_formats[0]:
        data = pd.read_csv(StringIO(contents))
    # Process the uploaded file
    elif file_type == valid_formats[1]:
        data = pd.read_json(contents)
    data = data.drop(columns=['ID'])
    dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
    data = data.rename(columns=dict_new_old_cols)
    return data

        
def output_batch(data1, data2):
    # data_dict = data_copy.to_dict('index') # Convert the data to a dictionary
    results_list = []
    # for index in range(len(data1)):
    #     row1 = data1.iloc(index).to_dict()
    #     row2 = data2.iloc(index).to_dict()
    #     results_list.append({'input': row1, 'output': row2})

    for row1, row2 in zip(data1.itertuples(index=False), data2.itertuples(index=False)):
        dictionary_from_dataframe1 = row1._asdict()
        dictionary_from_dataframe2 = row2._asdict()
        results_list.append({'input': dictionary_from_dataframe1, 'output': dictionary_from_dataframe2})

    final_dict = {'results': results_list}
    return final_dict