bright1 commited on
Commit
b4eec0c
1 Parent(s): bba1f1d

Updated app.py and utils.py

Browse files
Files changed (2) hide show
  1. src/app/app.py +19 -33
  2. src/utils.py +20 -28
src/app/app.py CHANGED
@@ -17,11 +17,13 @@ from typing import List
17
  # Create an instance of FastAPI
18
  app = FastAPI(debug=True)
19
 
 
20
  DIRPATH = os.path.dirname(os.path.realpath(__file__))
21
 
 
22
  model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
23
  transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
24
- properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'properties.pkl')
25
 
26
 
27
  # Load the trained model, pipeline, and other properties
@@ -46,16 +48,15 @@ def check_health():
46
  # Model information endpoint
47
  @app.post('/model-info')
48
  async def model_info():
49
- model_name = model.__class__.__name__
50
- model_params = model.get_params()
51
- features = properties['train features']
52
- print(features)
53
  model_information = {'model info': {
54
  'model name ': model_name,
55
  'model parameters': model_params,
56
  'train feature': features}
57
  }
58
- return model_information
59
 
60
 
61
  # Prediction endpoint
@@ -70,14 +71,9 @@ async def predict(plasma_glucose: float, blood_work_result_1: float,
70
  blood_work_result_2,blood_work_result_3,body_mass_index,
71
  blood_work_result_4, age,insurance]], columns=return_columns())
72
 
73
- data_copy = data.copy() # Create a copy of the dataframe
74
- label, prob = make_prediction(data, transformer, model) # Get the labels
75
- data_copy['Predicted Label'] = label[0] # Get the labels from making a prediction
76
- data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
77
- inputs = data.to_dict('index') # Convert dataframe to dictionary
78
- outputs = data_copy[['Predicted Label']].to_dict('index')
79
- response = {'inputs': inputs,
80
- 'outputs': outputs}
81
  return response
82
 
83
 
@@ -88,11 +84,7 @@ async def predict_batch(inputs: Inputs):
88
  data = pd.DataFrame(inputs.return_dict_inputs())
89
  data_copy = data.copy() # Create a copy of the data
90
  labels, probs = make_prediction(data, transformer, model) # Get the labels
91
- data_labels = pd.DataFrame(labels, columns=['Predicted Label'])
92
- data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1)
93
-
94
- response = output_batch(data, data_labels)
95
-
96
  return response
97
 
98
 
@@ -100,25 +92,19 @@ async def predict_batch(inputs: Inputs):
100
  # Upload data endpoint
101
  @app.post("/upload-data")
102
  async def upload_data(file: UploadFile = File(...)):
103
- file_type = file.content_type
104
- print(f'INFO {file_type}')
105
-
106
- valid_formats = ['text/csv', 'application/json']
107
-
108
  if file_type not in valid_formats:
109
- return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"})
110
 
111
  else:
112
- contents = await file.read()
113
- data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats)
114
- data_copy = data.copy() # Create a copy of the data
115
  labels, probs = make_prediction(data, transformer, model) # Get the labels
116
- data_copy['Predicted Label'] = labels# Create the predicted label column
117
- data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
118
- data_dict = data_copy.to_dict('index') # Convert data to a dictionary
119
- # print(data_dict.index)
120
 
121
- return {'outputs': data_dict}
122
 
123
  # Run the FastAPI application
124
  if __name__ == '__main__':
 
17
  # Create an instance of FastAPI
18
  app = FastAPI(debug=True)
19
 
20
+ # get absolute path
21
  DIRPATH = os.path.dirname(os.path.realpath(__file__))
22
 
23
+ # set path for pickle files
24
  model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
25
  transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
26
+ properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'other-components.pkl')
27
 
28
 
29
  # Load the trained model, pipeline, and other properties
 
48
  # Model information endpoint
49
  @app.post('/model-info')
50
  async def model_info():
51
+ model_name = model.__class__.__name__ # get model name
52
+ model_params = model.get_params() # get model parameters
53
+ features = properties['train features'] # get training feature
 
54
  model_information = {'model info': {
55
  'model name ': model_name,
56
  'model parameters': model_params,
57
  'train feature': features}
58
  }
59
+ return model_information # return model information
60
 
61
 
62
  # Prediction endpoint
 
71
  blood_work_result_2,blood_work_result_3,body_mass_index,
72
  blood_work_result_4, age,insurance]], columns=return_columns())
73
 
74
+ # data_copy = data.copy() # Create a copy of the dataframe
75
+ labels, prob = make_prediction(data, transformer, model) # Get the labels
76
+ response = output_batch(data, labels) # output results
 
 
 
 
 
77
  return response
78
 
79
 
 
84
  data = pd.DataFrame(inputs.return_dict_inputs())
85
  data_copy = data.copy() # Create a copy of the data
86
  labels, probs = make_prediction(data, transformer, model) # Get the labels
87
+ response = output_batch(data, labels) # output results
 
 
 
 
88
  return response
89
 
90
 
 
92
  # Upload data endpoint
93
  @app.post("/upload-data")
94
  async def upload_data(file: UploadFile = File(...)):
95
+ file_type = file.content_type # get the type of the uploaded file
96
+ valid_formats = ['text/csv', 'application/json'] # create a list of valid formats API can receive
 
 
 
97
  if file_type not in valid_formats:
98
+ return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"}) # return an error if file type is not included in the valid formats
99
 
100
  else:
101
+ contents = await file.read() # read contents in file
102
+ data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats) # process files
 
103
  labels, probs = make_prediction(data, transformer, model) # Get the labels
104
+ response = output_batch(data, labels) # output results
105
+
106
+ return response
 
107
 
 
108
 
109
  # Run the FastAPI application
110
  if __name__ == '__main__':
src/utils.py CHANGED
@@ -2,17 +2,12 @@ import pandas as pd
2
  import numpy as np
3
  import pickle
4
  from io import StringIO
5
- from fastapi.responses import JSONResponse
6
- # from cachetools import cached, TTLCache
7
 
8
- # # Define the cache
9
- # cache = TTLCache(maxsize=5, ttl=3600,) # Cache with a maximum size of 1 and a TTL of 1 hour
10
-
11
- # # # Load the model
12
- # @cached(cache)
13
  def load_pickle(filename):
14
- with open(filename, 'rb') as file:
15
- contents = pickle.load(file)
16
  return contents
17
 
18
 
@@ -52,14 +47,14 @@ def combine_cats_nums(transformed_data, full_pipeline):
52
 
53
  def make_prediction(data, transformer, model):
54
  new_columns = return_columns()
55
- dict_new_old_cols = dict(zip(data.columns, new_columns))
56
  data = data.rename(columns=dict_new_old_cols)
57
  feature_engineering(data) # create new features
58
  transformed_data = transformer.transform(data) # transform the data using the transformer
59
  combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
60
  # make prediction
61
  label = model.predict(transformed_data) # make a prediction
62
- probs = model.predict_proba(transformed_data)
63
  return label, probs.max()
64
 
65
 
@@ -70,6 +65,7 @@ def process_label(row):
70
  return 'Sepsis status is Positive'
71
  elif row['Predicted Label'] == 0:
72
  return 'Sepsis status is Negative'
 
73
 
74
  def return_columns():
75
  # create new columns
@@ -84,29 +80,25 @@ def process_json_csv(contents, file_type, valid_formats):
84
  # Read the file contents as a byte string
85
  contents = contents.decode() # Decode the byte string to a regular string
86
  new_columns = return_columns() # return new_columns
87
- if file_type == valid_formats[0]:
88
- data = pd.read_csv(StringIO(contents))
89
  # Process the uploaded file
 
 
90
  elif file_type == valid_formats[1]:
91
- data = pd.read_json(contents)
92
- data = data.drop(columns=['ID'])
93
  dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
94
- data = data.rename(columns=dict_new_old_cols)
95
  return data
96
 
97
 
98
- def output_batch(data1, data2):
99
- # data_dict = data_copy.to_dict('index') # Convert the data to a dictionary
100
- results_list = []
101
- # for index in range(len(data1)):
102
- # row1 = data1.iloc(index).to_dict()
103
- # row2 = data2.iloc(index).to_dict()
104
- # results_list.append({'input': row1, 'output': row2})
105
-
106
- for row1, row2 in zip(data1.itertuples(index=False), data2.itertuples(index=False)):
107
- dictionary_from_dataframe1 = row1._asdict()
108
- dictionary_from_dataframe2 = row2._asdict()
109
- results_list.append({'input': dictionary_from_dataframe1, 'output': dictionary_from_dataframe2})
110
 
111
  final_dict = {'results': results_list}
112
  return final_dict
 
2
  import numpy as np
3
  import pickle
4
  from io import StringIO
5
+ from functools import lru_cache
 
6
 
7
+ @lru_cache(maxsize=100, )
 
 
 
 
8
  def load_pickle(filename):
9
+ with open(filename, 'rb') as file: # read file
10
+ contents = pickle.load(file) # load contents of file
11
  return contents
12
 
13
 
 
47
 
48
  def make_prediction(data, transformer, model):
49
  new_columns = return_columns()
50
+ dict_new_old_cols = dict(zip(data.columns, new_columns)) # create a dict of original columns and new columns
51
  data = data.rename(columns=dict_new_old_cols)
52
  feature_engineering(data) # create new features
53
  transformed_data = transformer.transform(data) # transform the data using the transformer
54
  combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
55
  # make prediction
56
  label = model.predict(transformed_data) # make a prediction
57
+ probs = model.predict_proba(transformed_data) # predit sepsis status for inputs
58
  return label, probs.max()
59
 
60
 
 
65
  return 'Sepsis status is Positive'
66
  elif row['Predicted Label'] == 0:
67
  return 'Sepsis status is Negative'
68
+
69
 
70
  def return_columns():
71
  # create new columns
 
80
  # Read the file contents as a byte string
81
  contents = contents.decode() # Decode the byte string to a regular string
82
  new_columns = return_columns() # return new_columns
 
 
83
  # Process the uploaded file
84
+ if file_type == valid_formats[0]:
85
+ data = pd.read_csv(StringIO(contents)) # read csv files
86
  elif file_type == valid_formats[1]:
87
+ data = pd.read_json(contents) # read json file
88
+ data = data.drop(columns=['ID']) # drop ID column
89
  dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
90
+ data = data.rename(columns=dict_new_old_cols) # rename colums to appropriate columns
91
  return data
92
 
93
 
94
+ def output_batch(data1, labels):
95
+ data_labels = pd.DataFrame(labels, columns=['Predicted Label']) # convert label into a dataframe
96
+ data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1) # change label to understanding strings
97
+ results_list = [] # create an empty lits
98
+ x = data1.to_dict('index') # convert datafram into dictionary
99
+ y = data_labels.to_dict('index') # convert datafram into dictionary
100
+ for i in range(len(y)):
101
+ results_list.append({i:{'inputs': x[i], 'output':y[i]}}) # append input and labels
 
 
 
 
102
 
103
  final_dict = {'results': results_list}
104
  return final_dict