Spaces:

bright1
/

Sepsis-Prediction-API

Sleeping

App Files Files Community

bright1 commited on Jun 10, 2023

Commit

1b5b1a6

•

1 Parent(s): bc0c36c

Added App files

Browse files

Files changed (12) hide show

Dockerfile +17 -0
requirements.txt +8 -0
src/__init__.py +0 -0
src/app/app.py +125 -0
src/app/static/styles.css +3 -0
src/app/templates/index.html +13 -0
src/assets/ml_components/model-1.pkl +3 -0
src/assets/ml_components/other-components.pkl +3 -0
src/assets/ml_components/preprocessor.pkl +3 -0
src/assets/ml_components/properties.pkl +3 -0
src/module.py +23 -0
src/utils.py +112 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+#
+FROM python:3.9
+#
+WORKDIR /code
+#
+COPY ./requirements.txt /code/requirements.txt
+#
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+#
+COPY ./app /code/app
+#
+CMD ["uvicorn", "src.app.app:app", "--host", "0.0.0.0", "--port", "80"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+tabulate
+fastapi[all]==0.95.2
+uvicorn[standard]==0.22.0
+numpy==1.20.1
+pandas==1.2.4
+scikit-learn==0.24.1
+jinja2==3.1.2

src/__init__.py ADDED Viewed

File without changes

src/app/app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+import uvicorn
+from fastapi import FastAPI, Request, File, UploadFile
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from src.utils import load_pickle, make_prediction, process_label, process_json_csv, output_batch, return_columns
+from src.module import Inputs
+import pandas as pd
+import numpy as np
+from typing import List
+# Create an instance of FastAPI
+app = FastAPI(debug=True)
+DIRPATH = os.path.dirname(os.path.realpath(__file__))
+model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
+transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
+properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'properties.pkl')
+# Load the trained model, pipeline, and other properties
+model = load_pickle(model_path)
+transformer = load_pickle(transformer_path)
+properties = load_pickle(properties_path)
+# Configure static and template files
+app.mount("/static", StaticFiles(directory="src/app/static"), name="static") # Mount static files
+templates = Jinja2Templates(directory="src/app/templates") # Mount templates for HTML
+# Root endpoint to serve index.html template
+@app.get("/", response_class=HTMLResponse)
+async def root(request: Request):
+    return templates.TemplateResponse("index.html", {'request': request})
+# Health check endpoint
+@app.get("/health")
+def check_health():
+    return {"status": "ok"}
+# Model information endpoint
+@app.post('/model-info')
+async def model_info():
+    model_name = model.__class__.__name__
+    model_params = model.get_params()
+    features = properties['train features']
+    print(features)
+    model_information =  {'model info': {
+            'model name ': model_name,
+            'model parameters': model_params,
+            'train feature': features}
+            }
+    return model_information
+# Prediction endpoint
+@app.post('/predict')
+async def predict(plasma_glucose: float, blood_work_result_1: float,
+                  blood_pressure: float, blood_work_result_2: float,
+                  blood_work_result_3: float, body_mass_index: float,
+                  blood_work_result_4: float, age: int, insurance: bool):
+    # Create a dataframe from inputs
+    data = pd.DataFrame([[plasma_glucose,blood_work_result_1,blood_pressure,
+                           blood_work_result_2,blood_work_result_3,body_mass_index,
+                           blood_work_result_4, age,insurance]], columns=return_columns())
+    data_copy = data.copy() # Create a copy of the dataframe
+    label, prob = make_prediction(data, transformer, model) # Get the labels
+    data_copy['Predicted Label'] = label[0] # Get the labels from making a prediction
+    data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
+    inputs = data.to_dict('index') # Convert dataframe to dictionary
+    outputs = data_copy[['Predicted Label']].to_dict('index')
+    response = {'inputs': inputs,
+                'outputs': outputs}
+    return response
+# Batch prediction endpoint
+@app.post('/predict-batch')
+async def predict_batch(inputs: Inputs):
+    # Create a dataframe from inputs
+    data = pd.DataFrame(inputs.return_dict_inputs())
+    data_copy = data.copy() # Create a copy of the data
+    labels, probs = make_prediction(data, transformer, model) # Get the labels
+    data_labels = pd.DataFrame(labels, columns=['Predicted Label'])
+    data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1)
+    response = output_batch(data, data_labels)
+    return response
+# Upload data endpoint
+@app.post("/upload-data")
+async def upload_data(file: UploadFile = File(...)):
+    file_type = file.content_type
+    print(f'INFO    {file_type}')
+    valid_formats = ['text/csv', 'application/json']
+    if file_type not in valid_formats:
+        return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"})
+    else:
+        contents = await file.read()
+        data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats)
+        data_copy = data.copy() # Create a copy of the data
+        labels, probs = make_prediction(data, transformer, model) # Get the labels
+        data_copy['Predicted Label'] = labels# Create the predicted label column
+        data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
+        data_dict = data_copy.to_dict('index') # Convert data to a dictionary
+        # print(data_dict.index)
+    return {'outputs': data_dict}
+# Run the FastAPI application
+if __name__ == '__main__':
+    uvicorn.run('app:app', reload=True)

src/app/static/styles.css ADDED Viewed

	@@ -0,0 +1,3 @@

+h1 {
+    color:rgb(81, 146, 43);
+}

src/app/templates/index.html ADDED Viewed

	@@ -0,0 +1,13 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <link rel="stylesheet" href="{{ url_for('static', path='/styles.css') }}">
+    <title>Document</title>
+</head>
+<body>
+  <h1>Welcome to the Sepsis API</h1>
+</body>
+</html>

src/assets/ml_components/model-1.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a27997d87bb2dec63f3dde72105ea2232c39c1c961ba92b2f36095db4078229
+size 937

src/assets/ml_components/other-components.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b72e0521e300a6b0be14d89772ac467da5eabf078c21e85feb1dcc7a0a4701b
+size 471

src/assets/ml_components/preprocessor.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9e55b9060711ca80ea27bcff559d20e52e9952a65388b53db41f696a771eba1
+size 2456

src/assets/ml_components/properties.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0efeb8aa27c6d6bae723817e03f53782a5fc0847440e900539a58977204de0ac
+size 387

src/module.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from pydantic import BaseModel
+from typing import List
+class Input(BaseModel):
+    plasma_glucose: float
+    blood_work_result_1: float
+    blood_pressure: float
+    blood_work_result_2: float
+    blood_work_result_3: float
+    body_mass_index: float
+    blood_work_result_4: float
+    age: int
+    insurance: bool
+class Inputs(BaseModel):
+    all: List[Input]
+    def return_dict_inputs(
+            cls,
+    ):
+        return [ input.dict() for input in cls.all]

src/utils.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import pandas as pd
+import numpy as np
+import pickle
+from io import StringIO
+from fastapi.responses import JSONResponse
+# from cachetools import cached, TTLCache
+# # Define the cache
+# cache = TTLCache(maxsize=5, ttl=3600,)  # Cache with a maximum size of 1 and a TTL of 1 hour
+# # # Load the model
+# @cached(cache)
+def load_pickle(filename):
+    with open(filename, 'rb') as file:
+        contents = pickle.load(file)
+    return contents
+def feature_engineering(data):
+    data['Insurance'] = data['Insurance'].astype(int).astype(str) # run function to create new features
+    # create features
+    data['All-Product']  = data['Blood Work Result-4'] * data['Blood Work Result-1']* data['Blood Work Result-2']* data['Blood Work Result-3'] * data['Plasma Glucose']* data['Blood Pressure'] * data['Age']* data['Body Mass Index'] # Multiply all numerical features
+    all_labels =['{0}-{1}'.format(i, i+500000000000) for i in range(0, round(2714705253292.0312),500000000000)]
+    data['All-Product_range'] = pd.cut(data['All-Product'], bins=(range(0, 3500000000000, 500000000000)), right=False, labels=all_labels)
+    age_labels =['{0}-{1}'.format(i, i+20) for i in range(0, 83,20)]
+    data['Age Group'] = pd.cut(data['Age'], bins=(range(0, 120, 20)), right=False, labels=age_labels) # create categorical features for age
+    labels =['{0}-{1}'.format(i, i+30) for i in range(0, round(67.1),30)]
+    data['BMI_range'] = pd.cut(data['Body Mass Index'], bins=(range(0, 120, 30)), right=False, labels=labels) # create categorical features for bodey mass index
+    bp_labels =['{0}-{1}'.format(i, i+50) for i in range(0, round(122),50)]
+    data['BP_range'] = pd.cut(data['Blood Pressure'], bins=(range(0, 200, 50)), right=False, labels=bp_labels) # create categorical features for blood pressure
+    labels =['{0}-{1}'.format(i, i+7) for i in range(0, round(17),7)]
+    data['PG_range'] = pd.cut(data['Plasma Glucose'], bins=(range(0, 28, 7)), right=False, labels=labels) # create categorical features for plasma glucose
+    data.drop(columns=['Blood Pressure', 'Age', 'Body Mass Index','Plasma Glucose', 'All-Product', 'Blood Work Result-3', 'Blood Work Result-2'], inplace=True) # drop unused columns
+def combine_cats_nums(transformed_data, full_pipeline):
+    cat_features = full_pipeline.named_transformers_['categorical']['cat_encoder'].get_feature_names() # get the feature from the categorical transformer
+    num_features = ['Blood Work Result-1', 'Blood Work Result-4']
+    columns_ = np.concatenate([num_features, cat_features]) # concatenate numerical and categorical features
+    prepared_data = pd.DataFrame(transformed_data, columns=columns_) # create a dataframe from the transformed data
+    prepared_data = prepared_data.rename(columns={'x0_0':'Insurance_0', 'x0_1': 'Insurance_1'}) # rename columns
+def make_prediction(data, transformer, model):
+    new_columns = return_columns()
+    dict_new_old_cols = dict(zip(data.columns, new_columns))
+    data = data.rename(columns=dict_new_old_cols)
+    feature_engineering(data) # create new features
+    transformed_data = transformer.transform(data) # transform the data using the transformer
+    combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
+    # make prediction
+    label = model.predict(transformed_data) # make a prediction
+    probs = model.predict_proba(transformed_data)
+    return label, probs.max()
+# function to create a new column 'Bmi'
+def process_label(row):
+    if row['Predicted Label'] == 1:
+        return 'Sepsis status is Positive'
+    elif row['Predicted Label'] == 0:
+        return 'Sepsis status is Negative'
+def return_columns():
+    # create new columns
+    new_columns =  ['Plasma Glucose','Blood Work Result-1', 'Blood Pressure',
+                    'Blood Work Result-2', 'Blood Work Result-3', 'Body Mass Index',
+                    'Blood Work Result-4', 'Age', 'Insurance']
+    return new_columns
+def process_json_csv(contents, file_type, valid_formats):
+    # Read the file contents as a byte string
+    contents = contents.decode()  # Decode the byte string to a regular string
+    new_columns = return_columns() # return new_columns
+    if file_type == valid_formats[0]:
+        data = pd.read_csv(StringIO(contents))
+    # Process the uploaded file
+    elif file_type == valid_formats[1]:
+        data = pd.read_json(contents)
+    data = data.drop(columns=['ID'])
+    dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
+    data = data.rename(columns=dict_new_old_cols)
+    return data
+def output_batch(data1, data2):
+    # data_dict = data_copy.to_dict('index') # Convert the data to a dictionary
+    results_list = []
+    # for index in range(len(data1)):
+    #     row1 = data1.iloc(index).to_dict()
+    #     row2 = data2.iloc(index).to_dict()
+    #     results_list.append({'input': row1, 'output': row2})
+    for row1, row2 in zip(data1.itertuples(index=False), data2.itertuples(index=False)):
+        dictionary_from_dataframe1 = row1._asdict()
+        dictionary_from_dataframe2 = row2._asdict()
+        results_list.append({'input': dictionary_from_dataframe1, 'output': dictionary_from_dataframe2})
+    final_dict = {'results': results_list}
+    return final_dict