Spaces:
Sleeping
Sleeping
Added App files
Browse files- Dockerfile +17 -0
- requirements.txt +8 -0
- src/__init__.py +0 -0
- src/app/app.py +125 -0
- src/app/static/styles.css +3 -0
- src/app/templates/index.html +13 -0
- src/assets/ml_components/model-1.pkl +3 -0
- src/assets/ml_components/other-components.pkl +3 -0
- src/assets/ml_components/preprocessor.pkl +3 -0
- src/assets/ml_components/properties.pkl +3 -0
- src/module.py +23 -0
- src/utils.py +112 -0
Dockerfile
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
FROM python:3.9
|
3 |
+
|
4 |
+
#
|
5 |
+
WORKDIR /code
|
6 |
+
|
7 |
+
#
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
#
|
11 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
12 |
+
|
13 |
+
#
|
14 |
+
COPY ./app /code/app
|
15 |
+
|
16 |
+
#
|
17 |
+
CMD ["uvicorn", "src.app.app:app", "--host", "0.0.0.0", "--port", "80"]
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
tabulate
|
2 |
+
fastapi[all]==0.95.2
|
3 |
+
uvicorn[standard]==0.22.0
|
4 |
+
numpy==1.20.1
|
5 |
+
pandas==1.2.4
|
6 |
+
scikit-learn==0.24.1
|
7 |
+
jinja2==3.1.2
|
8 |
+
|
src/__init__.py
ADDED
File without changes
|
src/app/app.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
4 |
+
|
5 |
+
import uvicorn
|
6 |
+
from fastapi import FastAPI, Request, File, UploadFile
|
7 |
+
from fastapi.responses import HTMLResponse, JSONResponse
|
8 |
+
from fastapi.staticfiles import StaticFiles
|
9 |
+
from fastapi.templating import Jinja2Templates
|
10 |
+
from src.utils import load_pickle, make_prediction, process_label, process_json_csv, output_batch, return_columns
|
11 |
+
from src.module import Inputs
|
12 |
+
import pandas as pd
|
13 |
+
import numpy as np
|
14 |
+
from typing import List
|
15 |
+
|
16 |
+
|
17 |
+
# Create an instance of FastAPI
|
18 |
+
app = FastAPI(debug=True)
|
19 |
+
|
20 |
+
DIRPATH = os.path.dirname(os.path.realpath(__file__))
|
21 |
+
|
22 |
+
model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
|
23 |
+
transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
|
24 |
+
properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'properties.pkl')
|
25 |
+
|
26 |
+
|
27 |
+
# Load the trained model, pipeline, and other properties
|
28 |
+
model = load_pickle(model_path)
|
29 |
+
transformer = load_pickle(transformer_path)
|
30 |
+
properties = load_pickle(properties_path)
|
31 |
+
|
32 |
+
# Configure static and template files
|
33 |
+
app.mount("/static", StaticFiles(directory="src/app/static"), name="static") # Mount static files
|
34 |
+
templates = Jinja2Templates(directory="src/app/templates") # Mount templates for HTML
|
35 |
+
|
36 |
+
# Root endpoint to serve index.html template
|
37 |
+
@app.get("/", response_class=HTMLResponse)
|
38 |
+
async def root(request: Request):
|
39 |
+
return templates.TemplateResponse("index.html", {'request': request})
|
40 |
+
|
41 |
+
# Health check endpoint
|
42 |
+
@app.get("/health")
|
43 |
+
def check_health():
|
44 |
+
return {"status": "ok"}
|
45 |
+
|
46 |
+
# Model information endpoint
|
47 |
+
@app.post('/model-info')
|
48 |
+
async def model_info():
|
49 |
+
model_name = model.__class__.__name__
|
50 |
+
model_params = model.get_params()
|
51 |
+
features = properties['train features']
|
52 |
+
print(features)
|
53 |
+
model_information = {'model info': {
|
54 |
+
'model name ': model_name,
|
55 |
+
'model parameters': model_params,
|
56 |
+
'train feature': features}
|
57 |
+
}
|
58 |
+
return model_information
|
59 |
+
|
60 |
+
|
61 |
+
# Prediction endpoint
|
62 |
+
@app.post('/predict')
|
63 |
+
async def predict(plasma_glucose: float, blood_work_result_1: float,
|
64 |
+
blood_pressure: float, blood_work_result_2: float,
|
65 |
+
blood_work_result_3: float, body_mass_index: float,
|
66 |
+
blood_work_result_4: float, age: int, insurance: bool):
|
67 |
+
|
68 |
+
# Create a dataframe from inputs
|
69 |
+
data = pd.DataFrame([[plasma_glucose,blood_work_result_1,blood_pressure,
|
70 |
+
blood_work_result_2,blood_work_result_3,body_mass_index,
|
71 |
+
blood_work_result_4, age,insurance]], columns=return_columns())
|
72 |
+
|
73 |
+
data_copy = data.copy() # Create a copy of the dataframe
|
74 |
+
label, prob = make_prediction(data, transformer, model) # Get the labels
|
75 |
+
data_copy['Predicted Label'] = label[0] # Get the labels from making a prediction
|
76 |
+
data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
|
77 |
+
inputs = data.to_dict('index') # Convert dataframe to dictionary
|
78 |
+
outputs = data_copy[['Predicted Label']].to_dict('index')
|
79 |
+
response = {'inputs': inputs,
|
80 |
+
'outputs': outputs}
|
81 |
+
return response
|
82 |
+
|
83 |
+
|
84 |
+
# Batch prediction endpoint
|
85 |
+
@app.post('/predict-batch')
|
86 |
+
async def predict_batch(inputs: Inputs):
|
87 |
+
# Create a dataframe from inputs
|
88 |
+
data = pd.DataFrame(inputs.return_dict_inputs())
|
89 |
+
data_copy = data.copy() # Create a copy of the data
|
90 |
+
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
91 |
+
data_labels = pd.DataFrame(labels, columns=['Predicted Label'])
|
92 |
+
data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1)
|
93 |
+
|
94 |
+
response = output_batch(data, data_labels)
|
95 |
+
|
96 |
+
return response
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
# Upload data endpoint
|
101 |
+
@app.post("/upload-data")
|
102 |
+
async def upload_data(file: UploadFile = File(...)):
|
103 |
+
file_type = file.content_type
|
104 |
+
print(f'INFO {file_type}')
|
105 |
+
|
106 |
+
valid_formats = ['text/csv', 'application/json']
|
107 |
+
|
108 |
+
if file_type not in valid_formats:
|
109 |
+
return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"})
|
110 |
+
|
111 |
+
else:
|
112 |
+
contents = await file.read()
|
113 |
+
data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats)
|
114 |
+
data_copy = data.copy() # Create a copy of the data
|
115 |
+
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
116 |
+
data_copy['Predicted Label'] = labels# Create the predicted label column
|
117 |
+
data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
|
118 |
+
data_dict = data_copy.to_dict('index') # Convert data to a dictionary
|
119 |
+
# print(data_dict.index)
|
120 |
+
|
121 |
+
return {'outputs': data_dict}
|
122 |
+
|
123 |
+
# Run the FastAPI application
|
124 |
+
if __name__ == '__main__':
|
125 |
+
uvicorn.run('app:app', reload=True)
|
src/app/static/styles.css
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
h1 {
|
2 |
+
color:rgb(81, 146, 43);
|
3 |
+
}
|
src/app/templates/index.html
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7 |
+
<link rel="stylesheet" href="{{ url_for('static', path='/styles.css') }}">
|
8 |
+
<title>Document</title>
|
9 |
+
</head>
|
10 |
+
<body>
|
11 |
+
<h1>Welcome to the Sepsis API</h1>
|
12 |
+
</body>
|
13 |
+
</html>
|
src/assets/ml_components/model-1.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a27997d87bb2dec63f3dde72105ea2232c39c1c961ba92b2f36095db4078229
|
3 |
+
size 937
|
src/assets/ml_components/other-components.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b72e0521e300a6b0be14d89772ac467da5eabf078c21e85feb1dcc7a0a4701b
|
3 |
+
size 471
|
src/assets/ml_components/preprocessor.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9e55b9060711ca80ea27bcff559d20e52e9952a65388b53db41f696a771eba1
|
3 |
+
size 2456
|
src/assets/ml_components/properties.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0efeb8aa27c6d6bae723817e03f53782a5fc0847440e900539a58977204de0ac
|
3 |
+
size 387
|
src/module.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
|
5 |
+
class Input(BaseModel):
|
6 |
+
plasma_glucose: float
|
7 |
+
blood_work_result_1: float
|
8 |
+
blood_pressure: float
|
9 |
+
blood_work_result_2: float
|
10 |
+
blood_work_result_3: float
|
11 |
+
body_mass_index: float
|
12 |
+
blood_work_result_4: float
|
13 |
+
age: int
|
14 |
+
insurance: bool
|
15 |
+
|
16 |
+
|
17 |
+
class Inputs(BaseModel):
|
18 |
+
all: List[Input]
|
19 |
+
|
20 |
+
def return_dict_inputs(
|
21 |
+
cls,
|
22 |
+
):
|
23 |
+
return [ input.dict() for input in cls.all]
|
src/utils.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import pickle
|
4 |
+
from io import StringIO
|
5 |
+
from fastapi.responses import JSONResponse
|
6 |
+
# from cachetools import cached, TTLCache
|
7 |
+
|
8 |
+
# # Define the cache
|
9 |
+
# cache = TTLCache(maxsize=5, ttl=3600,) # Cache with a maximum size of 1 and a TTL of 1 hour
|
10 |
+
|
11 |
+
# # # Load the model
|
12 |
+
# @cached(cache)
|
13 |
+
def load_pickle(filename):
|
14 |
+
with open(filename, 'rb') as file:
|
15 |
+
contents = pickle.load(file)
|
16 |
+
return contents
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
def feature_engineering(data):
|
21 |
+
data['Insurance'] = data['Insurance'].astype(int).astype(str) # run function to create new features
|
22 |
+
# create features
|
23 |
+
data['All-Product'] = data['Blood Work Result-4'] * data['Blood Work Result-1']* data['Blood Work Result-2']* data['Blood Work Result-3'] * data['Plasma Glucose']* data['Blood Pressure'] * data['Age']* data['Body Mass Index'] # Multiply all numerical features
|
24 |
+
|
25 |
+
all_labels =['{0}-{1}'.format(i, i+500000000000) for i in range(0, round(2714705253292.0312),500000000000)]
|
26 |
+
data['All-Product_range'] = pd.cut(data['All-Product'], bins=(range(0, 3500000000000, 500000000000)), right=False, labels=all_labels)
|
27 |
+
|
28 |
+
age_labels =['{0}-{1}'.format(i, i+20) for i in range(0, 83,20)]
|
29 |
+
data['Age Group'] = pd.cut(data['Age'], bins=(range(0, 120, 20)), right=False, labels=age_labels) # create categorical features for age
|
30 |
+
|
31 |
+
labels =['{0}-{1}'.format(i, i+30) for i in range(0, round(67.1),30)]
|
32 |
+
data['BMI_range'] = pd.cut(data['Body Mass Index'], bins=(range(0, 120, 30)), right=False, labels=labels) # create categorical features for bodey mass index
|
33 |
+
|
34 |
+
bp_labels =['{0}-{1}'.format(i, i+50) for i in range(0, round(122),50)]
|
35 |
+
data['BP_range'] = pd.cut(data['Blood Pressure'], bins=(range(0, 200, 50)), right=False, labels=bp_labels) # create categorical features for blood pressure
|
36 |
+
|
37 |
+
labels =['{0}-{1}'.format(i, i+7) for i in range(0, round(17),7)]
|
38 |
+
data['PG_range'] = pd.cut(data['Plasma Glucose'], bins=(range(0, 28, 7)), right=False, labels=labels) # create categorical features for plasma glucose
|
39 |
+
|
40 |
+
data.drop(columns=['Blood Pressure', 'Age', 'Body Mass Index','Plasma Glucose', 'All-Product', 'Blood Work Result-3', 'Blood Work Result-2'], inplace=True) # drop unused columns
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
def combine_cats_nums(transformed_data, full_pipeline):
|
46 |
+
cat_features = full_pipeline.named_transformers_['categorical']['cat_encoder'].get_feature_names() # get the feature from the categorical transformer
|
47 |
+
num_features = ['Blood Work Result-1', 'Blood Work Result-4']
|
48 |
+
columns_ = np.concatenate([num_features, cat_features]) # concatenate numerical and categorical features
|
49 |
+
prepared_data = pd.DataFrame(transformed_data, columns=columns_) # create a dataframe from the transformed data
|
50 |
+
prepared_data = prepared_data.rename(columns={'x0_0':'Insurance_0', 'x0_1': 'Insurance_1'}) # rename columns
|
51 |
+
|
52 |
+
|
53 |
+
def make_prediction(data, transformer, model):
|
54 |
+
new_columns = return_columns()
|
55 |
+
dict_new_old_cols = dict(zip(data.columns, new_columns))
|
56 |
+
data = data.rename(columns=dict_new_old_cols)
|
57 |
+
feature_engineering(data) # create new features
|
58 |
+
transformed_data = transformer.transform(data) # transform the data using the transformer
|
59 |
+
combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
|
60 |
+
# make prediction
|
61 |
+
label = model.predict(transformed_data) # make a prediction
|
62 |
+
probs = model.predict_proba(transformed_data)
|
63 |
+
return label, probs.max()
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
# function to create a new column 'Bmi'
|
68 |
+
def process_label(row):
|
69 |
+
if row['Predicted Label'] == 1:
|
70 |
+
return 'Sepsis status is Positive'
|
71 |
+
elif row['Predicted Label'] == 0:
|
72 |
+
return 'Sepsis status is Negative'
|
73 |
+
|
74 |
+
def return_columns():
|
75 |
+
# create new columns
|
76 |
+
new_columns = ['Plasma Glucose','Blood Work Result-1', 'Blood Pressure',
|
77 |
+
'Blood Work Result-2', 'Blood Work Result-3', 'Body Mass Index',
|
78 |
+
'Blood Work Result-4', 'Age', 'Insurance']
|
79 |
+
return new_columns
|
80 |
+
|
81 |
+
|
82 |
+
def process_json_csv(contents, file_type, valid_formats):
|
83 |
+
|
84 |
+
# Read the file contents as a byte string
|
85 |
+
contents = contents.decode() # Decode the byte string to a regular string
|
86 |
+
new_columns = return_columns() # return new_columns
|
87 |
+
if file_type == valid_formats[0]:
|
88 |
+
data = pd.read_csv(StringIO(contents))
|
89 |
+
# Process the uploaded file
|
90 |
+
elif file_type == valid_formats[1]:
|
91 |
+
data = pd.read_json(contents)
|
92 |
+
data = data.drop(columns=['ID'])
|
93 |
+
dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
|
94 |
+
data = data.rename(columns=dict_new_old_cols)
|
95 |
+
return data
|
96 |
+
|
97 |
+
|
98 |
+
def output_batch(data1, data2):
|
99 |
+
# data_dict = data_copy.to_dict('index') # Convert the data to a dictionary
|
100 |
+
results_list = []
|
101 |
+
# for index in range(len(data1)):
|
102 |
+
# row1 = data1.iloc(index).to_dict()
|
103 |
+
# row2 = data2.iloc(index).to_dict()
|
104 |
+
# results_list.append({'input': row1, 'output': row2})
|
105 |
+
|
106 |
+
for row1, row2 in zip(data1.itertuples(index=False), data2.itertuples(index=False)):
|
107 |
+
dictionary_from_dataframe1 = row1._asdict()
|
108 |
+
dictionary_from_dataframe2 = row2._asdict()
|
109 |
+
results_list.append({'input': dictionary_from_dataframe1, 'output': dictionary_from_dataframe2})
|
110 |
+
|
111 |
+
final_dict = {'results': results_list}
|
112 |
+
return final_dict
|