Spaces:

bright1
/

Sepsis-Prediction-API

Sleeping

App Files Files Community

bright1 commited on Jun 12, 2023

Commit

b4eec0c

1 Parent(s): bba1f1d

Updated app.py and utils.py

Browse files

Files changed (2) hide show

src/app/app.py +19 -33
src/utils.py +20 -28

src/app/app.py CHANGED Viewed

@@ -17,11 +17,13 @@ from typing import List
 # Create an instance of FastAPI
 app = FastAPI(debug=True)
 DIRPATH = os.path.dirname(os.path.realpath(__file__))
 model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
 transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
-properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'properties.pkl')
 # Load the trained model, pipeline, and other properties
@@ -46,16 +48,15 @@ def check_health():
 # Model information endpoint
 @app.post('/model-info')
 async def model_info():
-    model_name = model.__class__.__name__
-    model_params = model.get_params()
-    features = properties['train features']
-    print(features)
     model_information =  {'model info': {
             'model name ': model_name,
             'model parameters': model_params,
             'train feature': features}
             }
-    return model_information
 # Prediction endpoint
@@ -70,14 +71,9 @@ async def predict(plasma_glucose: float, blood_work_result_1: float,
                            blood_work_result_2,blood_work_result_3,body_mass_index,
                            blood_work_result_4, age,insurance]], columns=return_columns())
-    data_copy = data.copy() # Create a copy of the dataframe
-    label, prob = make_prediction(data, transformer, model) # Get the labels
-    data_copy['Predicted Label'] = label[0] # Get the labels from making a prediction
-    data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
-    inputs = data.to_dict('index') # Convert dataframe to dictionary
-    outputs = data_copy[['Predicted Label']].to_dict('index')
-    response = {'inputs': inputs,
-                'outputs': outputs}
     return response
@@ -88,11 +84,7 @@ async def predict_batch(inputs: Inputs):
     data = pd.DataFrame(inputs.return_dict_inputs())
     data_copy = data.copy() # Create a copy of the data
     labels, probs = make_prediction(data, transformer, model) # Get the labels
-    data_labels = pd.DataFrame(labels, columns=['Predicted Label'])
-    data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1)
-    response = output_batch(data, data_labels)
     return response
@@ -100,25 +92,19 @@ async def predict_batch(inputs: Inputs):
 # Upload data endpoint
 @app.post("/upload-data")
 async def upload_data(file: UploadFile = File(...)):
-    file_type = file.content_type
-    print(f'INFO    {file_type}')
-    valid_formats = ['text/csv', 'application/json']
     if file_type not in valid_formats:
-        return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"})
     else:
-        contents = await file.read()
-        data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats)
-        data_copy = data.copy() # Create a copy of the data
         labels, probs = make_prediction(data, transformer, model) # Get the labels
-        data_copy['Predicted Label'] = labels# Create the predicted label column
-        data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
-        data_dict = data_copy.to_dict('index') # Convert data to a dictionary
-        # print(data_dict.index)
-    return {'outputs': data_dict}
 # Run the FastAPI application
 if __name__ == '__main__':

 # Create an instance of FastAPI
 app = FastAPI(debug=True)
+# get absolute path
 DIRPATH = os.path.dirname(os.path.realpath(__file__))
+# set path for pickle files
 model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
 transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
+properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'other-components.pkl')
 # Load the trained model, pipeline, and other properties
 # Model information endpoint
 @app.post('/model-info')
 async def model_info():
+    model_name = model.__class__.__name__ # get model name
+    model_params = model.get_params() # get model parameters
+    features = properties['train features'] # get training feature
     model_information =  {'model info': {
             'model name ': model_name,
             'model parameters': model_params,
             'train feature': features}
             }
+    return model_information # return model information
 # Prediction endpoint
                            blood_work_result_2,blood_work_result_3,body_mass_index,
                            blood_work_result_4, age,insurance]], columns=return_columns())
+    # data_copy = data.copy() # Create a copy of the dataframe
+    labels, prob = make_prediction(data, transformer, model) # Get the labels
+    response = output_batch(data, labels) # output results
     return response
     data = pd.DataFrame(inputs.return_dict_inputs())
     data_copy = data.copy() # Create a copy of the data
     labels, probs = make_prediction(data, transformer, model) # Get the labels
+    response = output_batch(data, labels) # output results
     return response
 # Upload data endpoint
 @app.post("/upload-data")
 async def upload_data(file: UploadFile = File(...)):
+    file_type = file.content_type # get the type of the uploaded file
+    valid_formats = ['text/csv', 'application/json'] # create a list of valid formats API can receive
     if file_type not in valid_formats:
+        return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"}) # return an error if file type is not included in the valid formats
     else:
+        contents = await file.read() # read contents in file
+        data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats) # process files
         labels, probs = make_prediction(data, transformer, model) # Get the labels
+        response = output_batch(data, labels) # output results
+    return response
 # Run the FastAPI application
 if __name__ == '__main__':

src/utils.py CHANGED Viewed

@@ -2,17 +2,12 @@ import pandas as pd
 import numpy as np
 import pickle
 from io import StringIO
-from fastapi.responses import JSONResponse
-# from cachetools import cached, TTLCache
-# # Define the cache
-# cache = TTLCache(maxsize=5, ttl=3600,)  # Cache with a maximum size of 1 and a TTL of 1 hour
-# # # Load the model
-# @cached(cache)
 def load_pickle(filename):
-    with open(filename, 'rb') as file:
-        contents = pickle.load(file)
     return contents
@@ -52,14 +47,14 @@ def combine_cats_nums(transformed_data, full_pipeline):
 def make_prediction(data, transformer, model):
     new_columns = return_columns()
-    dict_new_old_cols = dict(zip(data.columns, new_columns))
     data = data.rename(columns=dict_new_old_cols)
     feature_engineering(data) # create new features
     transformed_data = transformer.transform(data) # transform the data using the transformer
     combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
     # make prediction
     label = model.predict(transformed_data) # make a prediction
-    probs = model.predict_proba(transformed_data)
     return label, probs.max()
@@ -70,6 +65,7 @@ def process_label(row):
         return 'Sepsis status is Positive'
     elif row['Predicted Label'] == 0:
         return 'Sepsis status is Negative'
 def return_columns():
     # create new columns
@@ -84,29 +80,25 @@ def process_json_csv(contents, file_type, valid_formats):
     # Read the file contents as a byte string
     contents = contents.decode()  # Decode the byte string to a regular string
     new_columns = return_columns() # return new_columns
-    if file_type == valid_formats[0]:
-        data = pd.read_csv(StringIO(contents))
     # Process the uploaded file
     elif file_type == valid_formats[1]:
-        data = pd.read_json(contents)
-    data = data.drop(columns=['ID'])
     dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
-    data = data.rename(columns=dict_new_old_cols)
     return data
-def output_batch(data1, data2):
-    # data_dict = data_copy.to_dict('index') # Convert the data to a dictionary
-    results_list = []
-    # for index in range(len(data1)):
-    #     row1 = data1.iloc(index).to_dict()
-    #     row2 = data2.iloc(index).to_dict()
-    #     results_list.append({'input': row1, 'output': row2})
-    for row1, row2 in zip(data1.itertuples(index=False), data2.itertuples(index=False)):
-        dictionary_from_dataframe1 = row1._asdict()
-        dictionary_from_dataframe2 = row2._asdict()
-        results_list.append({'input': dictionary_from_dataframe1, 'output': dictionary_from_dataframe2})
     final_dict = {'results': results_list}
     return final_dict

 import numpy as np
 import pickle
 from io import StringIO
+from functools import lru_cache
+@lru_cache(maxsize=100, )
 def load_pickle(filename):
+    with open(filename, 'rb') as file: # read file
+        contents = pickle.load(file) # load contents of file
     return contents
 def make_prediction(data, transformer, model):
     new_columns = return_columns()
+    dict_new_old_cols = dict(zip(data.columns, new_columns)) # create a dict of original columns and new columns
     data = data.rename(columns=dict_new_old_cols)
     feature_engineering(data) # create new features
     transformed_data = transformer.transform(data) # transform the data using the transformer
     combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
     # make prediction
     label = model.predict(transformed_data) # make a prediction
+    probs = model.predict_proba(transformed_data) # predit sepsis status for inputs
     return label, probs.max()
         return 'Sepsis status is Positive'
     elif row['Predicted Label'] == 0:
         return 'Sepsis status is Negative'
 def return_columns():
     # create new columns
     # Read the file contents as a byte string
     contents = contents.decode()  # Decode the byte string to a regular string
     new_columns = return_columns() # return new_columns
     # Process the uploaded file
+    if file_type == valid_formats[0]:
+        data = pd.read_csv(StringIO(contents)) # read csv files
     elif file_type == valid_formats[1]:
+        data = pd.read_json(contents) # read json file
+    data = data.drop(columns=['ID']) # drop ID column
     dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
+    data = data.rename(columns=dict_new_old_cols) # rename colums to appropriate columns
     return data
+def output_batch(data1, labels):
+    data_labels = pd.DataFrame(labels, columns=['Predicted Label']) # convert label into a dataframe
+    data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1) # change label to understanding strings
+    results_list = [] # create an empty lits
+    x = data1.to_dict('index') # convert  datafram into dictionary
+    y = data_labels.to_dict('index') # convert  datafram into dictionary
+    for i in range(len(y)):
+        results_list.append({i:{'inputs': x[i], 'output':y[i]}}) # append input and labels
     final_dict = {'results': results_list}
     return final_dict