bright1 commited on
Commit
1b5b1a6
1 Parent(s): bc0c36c

Added App files

Browse files
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ FROM python:3.9
3
+
4
+ #
5
+ WORKDIR /code
6
+
7
+ #
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ #
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ #
14
+ COPY ./app /code/app
15
+
16
+ #
17
+ CMD ["uvicorn", "src.app.app:app", "--host", "0.0.0.0", "--port", "80"]
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ tabulate
2
+ fastapi[all]==0.95.2
3
+ uvicorn[standard]==0.22.0
4
+ numpy==1.20.1
5
+ pandas==1.2.4
6
+ scikit-learn==0.24.1
7
+ jinja2==3.1.2
8
+
src/__init__.py ADDED
File without changes
src/app/app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
4
+
5
+ import uvicorn
6
+ from fastapi import FastAPI, Request, File, UploadFile
7
+ from fastapi.responses import HTMLResponse, JSONResponse
8
+ from fastapi.staticfiles import StaticFiles
9
+ from fastapi.templating import Jinja2Templates
10
+ from src.utils import load_pickle, make_prediction, process_label, process_json_csv, output_batch, return_columns
11
+ from src.module import Inputs
12
+ import pandas as pd
13
+ import numpy as np
14
+ from typing import List
15
+
16
+
17
+ # Create an instance of FastAPI
18
+ app = FastAPI(debug=True)
19
+
20
+ DIRPATH = os.path.dirname(os.path.realpath(__file__))
21
+
22
+ model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
23
+ transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
24
+ properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'properties.pkl')
25
+
26
+
27
+ # Load the trained model, pipeline, and other properties
28
+ model = load_pickle(model_path)
29
+ transformer = load_pickle(transformer_path)
30
+ properties = load_pickle(properties_path)
31
+
32
+ # Configure static and template files
33
+ app.mount("/static", StaticFiles(directory="src/app/static"), name="static") # Mount static files
34
+ templates = Jinja2Templates(directory="src/app/templates") # Mount templates for HTML
35
+
36
+ # Root endpoint to serve index.html template
37
+ @app.get("/", response_class=HTMLResponse)
38
+ async def root(request: Request):
39
+ return templates.TemplateResponse("index.html", {'request': request})
40
+
41
+ # Health check endpoint
42
+ @app.get("/health")
43
+ def check_health():
44
+ return {"status": "ok"}
45
+
46
+ # Model information endpoint
47
+ @app.post('/model-info')
48
+ async def model_info():
49
+ model_name = model.__class__.__name__
50
+ model_params = model.get_params()
51
+ features = properties['train features']
52
+ print(features)
53
+ model_information = {'model info': {
54
+ 'model name ': model_name,
55
+ 'model parameters': model_params,
56
+ 'train feature': features}
57
+ }
58
+ return model_information
59
+
60
+
61
+ # Prediction endpoint
62
+ @app.post('/predict')
63
+ async def predict(plasma_glucose: float, blood_work_result_1: float,
64
+ blood_pressure: float, blood_work_result_2: float,
65
+ blood_work_result_3: float, body_mass_index: float,
66
+ blood_work_result_4: float, age: int, insurance: bool):
67
+
68
+ # Create a dataframe from inputs
69
+ data = pd.DataFrame([[plasma_glucose,blood_work_result_1,blood_pressure,
70
+ blood_work_result_2,blood_work_result_3,body_mass_index,
71
+ blood_work_result_4, age,insurance]], columns=return_columns())
72
+
73
+ data_copy = data.copy() # Create a copy of the dataframe
74
+ label, prob = make_prediction(data, transformer, model) # Get the labels
75
+ data_copy['Predicted Label'] = label[0] # Get the labels from making a prediction
76
+ data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
77
+ inputs = data.to_dict('index') # Convert dataframe to dictionary
78
+ outputs = data_copy[['Predicted Label']].to_dict('index')
79
+ response = {'inputs': inputs,
80
+ 'outputs': outputs}
81
+ return response
82
+
83
+
84
+ # Batch prediction endpoint
85
+ @app.post('/predict-batch')
86
+ async def predict_batch(inputs: Inputs):
87
+ # Create a dataframe from inputs
88
+ data = pd.DataFrame(inputs.return_dict_inputs())
89
+ data_copy = data.copy() # Create a copy of the data
90
+ labels, probs = make_prediction(data, transformer, model) # Get the labels
91
+ data_labels = pd.DataFrame(labels, columns=['Predicted Label'])
92
+ data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1)
93
+
94
+ response = output_batch(data, data_labels)
95
+
96
+ return response
97
+
98
+
99
+
100
+ # Upload data endpoint
101
+ @app.post("/upload-data")
102
+ async def upload_data(file: UploadFile = File(...)):
103
+ file_type = file.content_type
104
+ print(f'INFO {file_type}')
105
+
106
+ valid_formats = ['text/csv', 'application/json']
107
+
108
+ if file_type not in valid_formats:
109
+ return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"})
110
+
111
+ else:
112
+ contents = await file.read()
113
+ data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats)
114
+ data_copy = data.copy() # Create a copy of the data
115
+ labels, probs = make_prediction(data, transformer, model) # Get the labels
116
+ data_copy['Predicted Label'] = labels# Create the predicted label column
117
+ data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
118
+ data_dict = data_copy.to_dict('index') # Convert data to a dictionary
119
+ # print(data_dict.index)
120
+
121
+ return {'outputs': data_dict}
122
+
123
+ # Run the FastAPI application
124
+ if __name__ == '__main__':
125
+ uvicorn.run('app:app', reload=True)
src/app/static/styles.css ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ h1 {
2
+ color:rgb(81, 146, 43);
3
+ }
src/app/templates/index.html ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <link rel="stylesheet" href="{{ url_for('static', path='/styles.css') }}">
8
+ <title>Document</title>
9
+ </head>
10
+ <body>
11
+ <h1>Welcome to the Sepsis API</h1>
12
+ </body>
13
+ </html>
src/assets/ml_components/model-1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a27997d87bb2dec63f3dde72105ea2232c39c1c961ba92b2f36095db4078229
3
+ size 937
src/assets/ml_components/other-components.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b72e0521e300a6b0be14d89772ac467da5eabf078c21e85feb1dcc7a0a4701b
3
+ size 471
src/assets/ml_components/preprocessor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9e55b9060711ca80ea27bcff559d20e52e9952a65388b53db41f696a771eba1
3
+ size 2456
src/assets/ml_components/properties.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0efeb8aa27c6d6bae723817e03f53782a5fc0847440e900539a58977204de0ac
3
+ size 387
src/module.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+
5
+ class Input(BaseModel):
6
+ plasma_glucose: float
7
+ blood_work_result_1: float
8
+ blood_pressure: float
9
+ blood_work_result_2: float
10
+ blood_work_result_3: float
11
+ body_mass_index: float
12
+ blood_work_result_4: float
13
+ age: int
14
+ insurance: bool
15
+
16
+
17
+ class Inputs(BaseModel):
18
+ all: List[Input]
19
+
20
+ def return_dict_inputs(
21
+ cls,
22
+ ):
23
+ return [ input.dict() for input in cls.all]
src/utils.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import pickle
4
+ from io import StringIO
5
+ from fastapi.responses import JSONResponse
6
+ # from cachetools import cached, TTLCache
7
+
8
+ # # Define the cache
9
+ # cache = TTLCache(maxsize=5, ttl=3600,) # Cache with a maximum size of 1 and a TTL of 1 hour
10
+
11
+ # # # Load the model
12
+ # @cached(cache)
13
+ def load_pickle(filename):
14
+ with open(filename, 'rb') as file:
15
+ contents = pickle.load(file)
16
+ return contents
17
+
18
+
19
+
20
+ def feature_engineering(data):
21
+ data['Insurance'] = data['Insurance'].astype(int).astype(str) # run function to create new features
22
+ # create features
23
+ data['All-Product'] = data['Blood Work Result-4'] * data['Blood Work Result-1']* data['Blood Work Result-2']* data['Blood Work Result-3'] * data['Plasma Glucose']* data['Blood Pressure'] * data['Age']* data['Body Mass Index'] # Multiply all numerical features
24
+
25
+ all_labels =['{0}-{1}'.format(i, i+500000000000) for i in range(0, round(2714705253292.0312),500000000000)]
26
+ data['All-Product_range'] = pd.cut(data['All-Product'], bins=(range(0, 3500000000000, 500000000000)), right=False, labels=all_labels)
27
+
28
+ age_labels =['{0}-{1}'.format(i, i+20) for i in range(0, 83,20)]
29
+ data['Age Group'] = pd.cut(data['Age'], bins=(range(0, 120, 20)), right=False, labels=age_labels) # create categorical features for age
30
+
31
+ labels =['{0}-{1}'.format(i, i+30) for i in range(0, round(67.1),30)]
32
+ data['BMI_range'] = pd.cut(data['Body Mass Index'], bins=(range(0, 120, 30)), right=False, labels=labels) # create categorical features for bodey mass index
33
+
34
+ bp_labels =['{0}-{1}'.format(i, i+50) for i in range(0, round(122),50)]
35
+ data['BP_range'] = pd.cut(data['Blood Pressure'], bins=(range(0, 200, 50)), right=False, labels=bp_labels) # create categorical features for blood pressure
36
+
37
+ labels =['{0}-{1}'.format(i, i+7) for i in range(0, round(17),7)]
38
+ data['PG_range'] = pd.cut(data['Plasma Glucose'], bins=(range(0, 28, 7)), right=False, labels=labels) # create categorical features for plasma glucose
39
+
40
+ data.drop(columns=['Blood Pressure', 'Age', 'Body Mass Index','Plasma Glucose', 'All-Product', 'Blood Work Result-3', 'Blood Work Result-2'], inplace=True) # drop unused columns
41
+
42
+
43
+
44
+
45
+ def combine_cats_nums(transformed_data, full_pipeline):
46
+ cat_features = full_pipeline.named_transformers_['categorical']['cat_encoder'].get_feature_names() # get the feature from the categorical transformer
47
+ num_features = ['Blood Work Result-1', 'Blood Work Result-4']
48
+ columns_ = np.concatenate([num_features, cat_features]) # concatenate numerical and categorical features
49
+ prepared_data = pd.DataFrame(transformed_data, columns=columns_) # create a dataframe from the transformed data
50
+ prepared_data = prepared_data.rename(columns={'x0_0':'Insurance_0', 'x0_1': 'Insurance_1'}) # rename columns
51
+
52
+
53
+ def make_prediction(data, transformer, model):
54
+ new_columns = return_columns()
55
+ dict_new_old_cols = dict(zip(data.columns, new_columns))
56
+ data = data.rename(columns=dict_new_old_cols)
57
+ feature_engineering(data) # create new features
58
+ transformed_data = transformer.transform(data) # transform the data using the transformer
59
+ combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
60
+ # make prediction
61
+ label = model.predict(transformed_data) # make a prediction
62
+ probs = model.predict_proba(transformed_data)
63
+ return label, probs.max()
64
+
65
+
66
+
67
+ # function to create a new column 'Bmi'
68
+ def process_label(row):
69
+ if row['Predicted Label'] == 1:
70
+ return 'Sepsis status is Positive'
71
+ elif row['Predicted Label'] == 0:
72
+ return 'Sepsis status is Negative'
73
+
74
+ def return_columns():
75
+ # create new columns
76
+ new_columns = ['Plasma Glucose','Blood Work Result-1', 'Blood Pressure',
77
+ 'Blood Work Result-2', 'Blood Work Result-3', 'Body Mass Index',
78
+ 'Blood Work Result-4', 'Age', 'Insurance']
79
+ return new_columns
80
+
81
+
82
+ def process_json_csv(contents, file_type, valid_formats):
83
+
84
+ # Read the file contents as a byte string
85
+ contents = contents.decode() # Decode the byte string to a regular string
86
+ new_columns = return_columns() # return new_columns
87
+ if file_type == valid_formats[0]:
88
+ data = pd.read_csv(StringIO(contents))
89
+ # Process the uploaded file
90
+ elif file_type == valid_formats[1]:
91
+ data = pd.read_json(contents)
92
+ data = data.drop(columns=['ID'])
93
+ dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
94
+ data = data.rename(columns=dict_new_old_cols)
95
+ return data
96
+
97
+
98
+ def output_batch(data1, data2):
99
+ # data_dict = data_copy.to_dict('index') # Convert the data to a dictionary
100
+ results_list = []
101
+ # for index in range(len(data1)):
102
+ # row1 = data1.iloc(index).to_dict()
103
+ # row2 = data2.iloc(index).to_dict()
104
+ # results_list.append({'input': row1, 'output': row2})
105
+
106
+ for row1, row2 in zip(data1.itertuples(index=False), data2.itertuples(index=False)):
107
+ dictionary_from_dataframe1 = row1._asdict()
108
+ dictionary_from_dataframe2 = row2._asdict()
109
+ results_list.append({'input': dictionary_from_dataframe1, 'output': dictionary_from_dataframe2})
110
+
111
+ final_dict = {'results': results_list}
112
+ return final_dict