Spaces:
Sleeping
Sleeping
Updated app.py and utils.py
Browse files- src/app/app.py +19 -33
- src/utils.py +20 -28
src/app/app.py
CHANGED
@@ -17,11 +17,13 @@ from typing import List
|
|
17 |
# Create an instance of FastAPI
|
18 |
app = FastAPI(debug=True)
|
19 |
|
|
|
20 |
DIRPATH = os.path.dirname(os.path.realpath(__file__))
|
21 |
|
|
|
22 |
model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
|
23 |
transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
|
24 |
-
properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', '
|
25 |
|
26 |
|
27 |
# Load the trained model, pipeline, and other properties
|
@@ -46,16 +48,15 @@ def check_health():
|
|
46 |
# Model information endpoint
|
47 |
@app.post('/model-info')
|
48 |
async def model_info():
|
49 |
-
model_name = model.__class__.__name__
|
50 |
-
model_params = model.get_params()
|
51 |
-
features = properties['train features']
|
52 |
-
print(features)
|
53 |
model_information = {'model info': {
|
54 |
'model name ': model_name,
|
55 |
'model parameters': model_params,
|
56 |
'train feature': features}
|
57 |
}
|
58 |
-
return model_information
|
59 |
|
60 |
|
61 |
# Prediction endpoint
|
@@ -70,14 +71,9 @@ async def predict(plasma_glucose: float, blood_work_result_1: float,
|
|
70 |
blood_work_result_2,blood_work_result_3,body_mass_index,
|
71 |
blood_work_result_4, age,insurance]], columns=return_columns())
|
72 |
|
73 |
-
data_copy = data.copy() # Create a copy of the dataframe
|
74 |
-
|
75 |
-
|
76 |
-
data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
|
77 |
-
inputs = data.to_dict('index') # Convert dataframe to dictionary
|
78 |
-
outputs = data_copy[['Predicted Label']].to_dict('index')
|
79 |
-
response = {'inputs': inputs,
|
80 |
-
'outputs': outputs}
|
81 |
return response
|
82 |
|
83 |
|
@@ -88,11 +84,7 @@ async def predict_batch(inputs: Inputs):
|
|
88 |
data = pd.DataFrame(inputs.return_dict_inputs())
|
89 |
data_copy = data.copy() # Create a copy of the data
|
90 |
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
91 |
-
|
92 |
-
data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1)
|
93 |
-
|
94 |
-
response = output_batch(data, data_labels)
|
95 |
-
|
96 |
return response
|
97 |
|
98 |
|
@@ -100,25 +92,19 @@ async def predict_batch(inputs: Inputs):
|
|
100 |
# Upload data endpoint
|
101 |
@app.post("/upload-data")
|
102 |
async def upload_data(file: UploadFile = File(...)):
|
103 |
-
file_type = file.content_type
|
104 |
-
|
105 |
-
|
106 |
-
valid_formats = ['text/csv', 'application/json']
|
107 |
-
|
108 |
if file_type not in valid_formats:
|
109 |
-
return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"})
|
110 |
|
111 |
else:
|
112 |
-
contents = await file.read()
|
113 |
-
data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats)
|
114 |
-
data_copy = data.copy() # Create a copy of the data
|
115 |
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
# print(data_dict.index)
|
120 |
|
121 |
-
return {'outputs': data_dict}
|
122 |
|
123 |
# Run the FastAPI application
|
124 |
if __name__ == '__main__':
|
|
|
17 |
# Create an instance of FastAPI
|
18 |
app = FastAPI(debug=True)
|
19 |
|
20 |
+
# get absolute path
|
21 |
DIRPATH = os.path.dirname(os.path.realpath(__file__))
|
22 |
|
23 |
+
# set path for pickle files
|
24 |
model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
|
25 |
transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
|
26 |
+
properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'other-components.pkl')
|
27 |
|
28 |
|
29 |
# Load the trained model, pipeline, and other properties
|
|
|
48 |
# Model information endpoint
|
49 |
@app.post('/model-info')
|
50 |
async def model_info():
|
51 |
+
model_name = model.__class__.__name__ # get model name
|
52 |
+
model_params = model.get_params() # get model parameters
|
53 |
+
features = properties['train features'] # get training feature
|
|
|
54 |
model_information = {'model info': {
|
55 |
'model name ': model_name,
|
56 |
'model parameters': model_params,
|
57 |
'train feature': features}
|
58 |
}
|
59 |
+
return model_information # return model information
|
60 |
|
61 |
|
62 |
# Prediction endpoint
|
|
|
71 |
blood_work_result_2,blood_work_result_3,body_mass_index,
|
72 |
blood_work_result_4, age,insurance]], columns=return_columns())
|
73 |
|
74 |
+
# data_copy = data.copy() # Create a copy of the dataframe
|
75 |
+
labels, prob = make_prediction(data, transformer, model) # Get the labels
|
76 |
+
response = output_batch(data, labels) # output results
|
|
|
|
|
|
|
|
|
|
|
77 |
return response
|
78 |
|
79 |
|
|
|
84 |
data = pd.DataFrame(inputs.return_dict_inputs())
|
85 |
data_copy = data.copy() # Create a copy of the data
|
86 |
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
87 |
+
response = output_batch(data, labels) # output results
|
|
|
|
|
|
|
|
|
88 |
return response
|
89 |
|
90 |
|
|
|
92 |
# Upload data endpoint
|
93 |
@app.post("/upload-data")
|
94 |
async def upload_data(file: UploadFile = File(...)):
|
95 |
+
file_type = file.content_type # get the type of the uploaded file
|
96 |
+
valid_formats = ['text/csv', 'application/json'] # create a list of valid formats API can receive
|
|
|
|
|
|
|
97 |
if file_type not in valid_formats:
|
98 |
+
return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"}) # return an error if file type is not included in the valid formats
|
99 |
|
100 |
else:
|
101 |
+
contents = await file.read() # read contents in file
|
102 |
+
data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats) # process files
|
|
|
103 |
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
104 |
+
response = output_batch(data, labels) # output results
|
105 |
+
|
106 |
+
return response
|
|
|
107 |
|
|
|
108 |
|
109 |
# Run the FastAPI application
|
110 |
if __name__ == '__main__':
|
src/utils.py
CHANGED
@@ -2,17 +2,12 @@ import pandas as pd
|
|
2 |
import numpy as np
|
3 |
import pickle
|
4 |
from io import StringIO
|
5 |
-
from
|
6 |
-
# from cachetools import cached, TTLCache
|
7 |
|
8 |
-
|
9 |
-
# cache = TTLCache(maxsize=5, ttl=3600,) # Cache with a maximum size of 1 and a TTL of 1 hour
|
10 |
-
|
11 |
-
# # # Load the model
|
12 |
-
# @cached(cache)
|
13 |
def load_pickle(filename):
|
14 |
-
with open(filename, 'rb') as file:
|
15 |
-
contents = pickle.load(file)
|
16 |
return contents
|
17 |
|
18 |
|
@@ -52,14 +47,14 @@ def combine_cats_nums(transformed_data, full_pipeline):
|
|
52 |
|
53 |
def make_prediction(data, transformer, model):
|
54 |
new_columns = return_columns()
|
55 |
-
dict_new_old_cols = dict(zip(data.columns, new_columns))
|
56 |
data = data.rename(columns=dict_new_old_cols)
|
57 |
feature_engineering(data) # create new features
|
58 |
transformed_data = transformer.transform(data) # transform the data using the transformer
|
59 |
combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
|
60 |
# make prediction
|
61 |
label = model.predict(transformed_data) # make a prediction
|
62 |
-
probs = model.predict_proba(transformed_data)
|
63 |
return label, probs.max()
|
64 |
|
65 |
|
@@ -70,6 +65,7 @@ def process_label(row):
|
|
70 |
return 'Sepsis status is Positive'
|
71 |
elif row['Predicted Label'] == 0:
|
72 |
return 'Sepsis status is Negative'
|
|
|
73 |
|
74 |
def return_columns():
|
75 |
# create new columns
|
@@ -84,29 +80,25 @@ def process_json_csv(contents, file_type, valid_formats):
|
|
84 |
# Read the file contents as a byte string
|
85 |
contents = contents.decode() # Decode the byte string to a regular string
|
86 |
new_columns = return_columns() # return new_columns
|
87 |
-
if file_type == valid_formats[0]:
|
88 |
-
data = pd.read_csv(StringIO(contents))
|
89 |
# Process the uploaded file
|
|
|
|
|
90 |
elif file_type == valid_formats[1]:
|
91 |
-
data = pd.read_json(contents)
|
92 |
-
data = data.drop(columns=['ID'])
|
93 |
dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
|
94 |
-
data = data.rename(columns=dict_new_old_cols)
|
95 |
return data
|
96 |
|
97 |
|
98 |
-
def output_batch(data1,
|
99 |
-
|
100 |
-
|
101 |
-
#
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
for row1, row2 in zip(data1.itertuples(index=False), data2.itertuples(index=False)):
|
107 |
-
dictionary_from_dataframe1 = row1._asdict()
|
108 |
-
dictionary_from_dataframe2 = row2._asdict()
|
109 |
-
results_list.append({'input': dictionary_from_dataframe1, 'output': dictionary_from_dataframe2})
|
110 |
|
111 |
final_dict = {'results': results_list}
|
112 |
return final_dict
|
|
|
2 |
import numpy as np
|
3 |
import pickle
|
4 |
from io import StringIO
|
5 |
+
from functools import lru_cache
|
|
|
6 |
|
7 |
+
@lru_cache(maxsize=100, )
|
|
|
|
|
|
|
|
|
8 |
def load_pickle(filename):
|
9 |
+
with open(filename, 'rb') as file: # read file
|
10 |
+
contents = pickle.load(file) # load contents of file
|
11 |
return contents
|
12 |
|
13 |
|
|
|
47 |
|
48 |
def make_prediction(data, transformer, model):
|
49 |
new_columns = return_columns()
|
50 |
+
dict_new_old_cols = dict(zip(data.columns, new_columns)) # create a dict of original columns and new columns
|
51 |
data = data.rename(columns=dict_new_old_cols)
|
52 |
feature_engineering(data) # create new features
|
53 |
transformed_data = transformer.transform(data) # transform the data using the transformer
|
54 |
combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
|
55 |
# make prediction
|
56 |
label = model.predict(transformed_data) # make a prediction
|
57 |
+
probs = model.predict_proba(transformed_data) # predit sepsis status for inputs
|
58 |
return label, probs.max()
|
59 |
|
60 |
|
|
|
65 |
return 'Sepsis status is Positive'
|
66 |
elif row['Predicted Label'] == 0:
|
67 |
return 'Sepsis status is Negative'
|
68 |
+
|
69 |
|
70 |
def return_columns():
|
71 |
# create new columns
|
|
|
80 |
# Read the file contents as a byte string
|
81 |
contents = contents.decode() # Decode the byte string to a regular string
|
82 |
new_columns = return_columns() # return new_columns
|
|
|
|
|
83 |
# Process the uploaded file
|
84 |
+
if file_type == valid_formats[0]:
|
85 |
+
data = pd.read_csv(StringIO(contents)) # read csv files
|
86 |
elif file_type == valid_formats[1]:
|
87 |
+
data = pd.read_json(contents) # read json file
|
88 |
+
data = data.drop(columns=['ID']) # drop ID column
|
89 |
dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
|
90 |
+
data = data.rename(columns=dict_new_old_cols) # rename colums to appropriate columns
|
91 |
return data
|
92 |
|
93 |
|
94 |
+
def output_batch(data1, labels):
|
95 |
+
data_labels = pd.DataFrame(labels, columns=['Predicted Label']) # convert label into a dataframe
|
96 |
+
data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1) # change label to understanding strings
|
97 |
+
results_list = [] # create an empty lits
|
98 |
+
x = data1.to_dict('index') # convert datafram into dictionary
|
99 |
+
y = data_labels.to_dict('index') # convert datafram into dictionary
|
100 |
+
for i in range(len(y)):
|
101 |
+
results_list.append({i:{'inputs': x[i], 'output':y[i]}}) # append input and labels
|
|
|
|
|
|
|
|
|
102 |
|
103 |
final_dict = {'results': results_list}
|
104 |
return final_dict
|