bright1 commited on
Commit
7caf0d4
1 Parent(s): 99146a9

Added api file

Browse files
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ FROM python:3.9
3
+
4
+ #
5
+ WORKDIR /code
6
+
7
+ #
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ #
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ #
14
+ COPY ./src /code/src
15
+
16
+ #
17
+ CMD ["uvicorn", "src.app.app:app", "--host", "0.0.0.0", "--port", "7860"]
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ matplotlib==3.3.4
2
+ numpy==1.22.4
3
+ pandas==1.2.4
4
+ scipy==1.6.2
5
+ seaborn==0.11.1
6
+ scikit-learn==0.24.1
7
+ xgboost==1.7.3
8
+ streamlit==1.23.1
9
+ fastapi[all]==0.98.0
10
+ uvicorn[standard]==0.22.0
src/__init__.py ADDED
File without changes
src/app/app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import uvicorn
3
+ from datetime import datetime
4
+ from typing import Annotated
5
+ import os
6
+ import sys
7
+ import datetime
8
+ import pandas as pd
9
+
10
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
11
+ from src.utils import load_file, make_predcition, date_extracts
12
+
13
+
14
+
15
+
16
+
17
+ # Create an instance of FastAPI
18
+ app = FastAPI(debug=True)
19
+
20
+ # get absolute path
21
+ DIRPATH = os.path.dirname(os.path.realpath(__file__))
22
+
23
+ # set path for ml files
24
+ ml_contents_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'toolkit_folder')
25
+
26
+ # get contents
27
+ ml_contents = load_file(ml_contents_path)
28
+
29
+ Encoder = ml_contents["OneHotEncoder"]
30
+ model = ml_contents["model"]
31
+ features_ = ml_contents['feature_names']
32
+
33
+
34
+
35
+ # define endpoints
36
+
37
+ @app.get('/')
38
+ def root():
39
+ return 'Welcome to the Gorecery Sales Forecasting API'
40
+
41
+ @app.get('/health')
42
+ def check_health():
43
+ return {'status': 'ok'}
44
+
45
+ @app.post('/predict')
46
+ async def predict_sales( store_id: int, category_id: int, onpromotion: int,
47
+ city: str, store_type: int, cluster: int, date_: Annotated[datetime.date, "The date of sales"] = datetime.date.today()):
48
+
49
+ # create a dictionary of inputs
50
+ input = {
51
+ 'store_id':[store_id],
52
+ 'category_id':[category_id],
53
+ 'onpromotion' :[onpromotion],
54
+ 'type' : [store_type],
55
+ 'cluster': [cluster],
56
+ 'city' : [city],
57
+ 'date_': [date_]
58
+ }
59
+
60
+ # convert to dataframe and extract datetime features
61
+ input_data = pd.DataFrame(input)
62
+ date_extracts(input_data)
63
+
64
+
65
+ # make prediction
66
+ sales = make_predcition(Encoder, model, input)
67
+ sales_value = float(sales[0])
68
+ return {'sales': sales_value}
69
+
70
+
71
+
72
+ if __name__ == "__main__":
73
+ uvicorn.run('app:app', reload=True)
74
+
75
+
76
+
77
+
src/assets/ml_components/toolkit_folder ADDED
Binary file (221 kB). View file
 
src/utils.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import pickle
4
+ from io import StringIO
5
+ from functools import lru_cache
6
+
7
+
8
+ def return_features():
9
+ features_ = ['store_id', 'category_id', 'onpromotion', 'type', 'cluster', 'year', 'month', 'dayofmonth', 'dayofweek', 'dayofyear', 'weekofyear', 'quarter', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'year_weekofyear', 'x0_Accra', 'x0_Aflao', 'x0_Akim Oda', 'x0_Akwatia', 'x0_Bekwai', 'x0_Cape coast', 'x0_Elmina,', 'x0_Gbawe', 'x0_Ho', 'x0_Hohoe', 'x0_Kintampo', 'x0_Koforidua', 'x0_Kumasi', 'x0_Mampong', 'x0_Obuasi', 'x0_Prestea', 'x0_Suhum', 'x0_Tamale', 'x0_Techiman', 'x0_Tema', 'x0_Teshie', 'x0_Winneba']
10
+ return features_
11
+
12
+
13
+
14
+ @lru_cache(maxsize=100, )
15
+ def load_file(filename):
16
+ with open(filename, 'rb') as file: # read file
17
+ contents = pickle.load(file) # load contents of file
18
+ return contents
19
+
20
+ def date_extracts(df):
21
+ # Extract date features
22
+ df['date_'] = pd.to_datetime(df['date_'], errors='coerce')
23
+ df['year'] = df['date_'].dt.year
24
+ df['month'] = df['date_'].dt.month
25
+ df['dayofmonth'] = df['date_'].dt.day
26
+ df['dayofweek'] = df['date_'].dt.dayofweek
27
+ df['dayofyear'] = df['date_'].dt.dayofyear
28
+ df['weekofyear'] = df['date_'].dt.weekofyear
29
+ df['quarter'] = df['date_'].dt.quarter
30
+ df['is_month_start'] = df['date_'].dt.is_month_start.astype(int)
31
+ df['is_month_end'] = df['date_'].dt.is_month_end.astype(int)
32
+ df['is_quarter_start'] = df['date_'].dt.is_quarter_start.astype(int)
33
+ df['is_quarter_end'] = df['date_'].dt.is_quarter_end.astype(int)
34
+ df['is_year_start'] = df['date_'].dt.is_year_start.astype(int)
35
+ df['is_year_end'] = df['date_'].dt.is_year_end.astype(int)
36
+ df['year_weekofyear'] = ((df['year'] -2017) *100 )+ df['weekofyear']
37
+
38
+ df.drop(columns=['date_'], inplace=True)
39
+
40
+
41
+
42
+
43
+ # defining categories and numeric columns
44
+ def make_predcition(Encoder, model, input_df):
45
+ if isinstance(input_df, dict):
46
+ # Put the input dictionary in a dataset
47
+ input_data = pd.DataFrame(input_df)
48
+
49
+ if isinstance(input_df, pd.DataFrame):
50
+ input_data = input_df
51
+ col = ['city']
52
+ columns = list(input_data.columns)
53
+ encoded_cat = Encoder.transform(input_data[col])
54
+ encoded_cols = Encoder.get_feature_names()
55
+ encoded_cat_ = pd.DataFrame(encoded_cat, columns=encoded_cols)
56
+
57
+
58
+
59
+
60
+ # we dropped the categorical encoder column before we concat
61
+ train_enc = input_data.drop(['city'],axis = 1)
62
+ input_d = pd.concat([train_enc, encoded_cat_], axis=1)
63
+ input_d = input_d.reindex(columns=return_features())
64
+
65
+ # print(input_d)
66
+
67
+ # # convert input_data to a numpy array before flattening to convert it back to a 2D array
68
+ # input_df= input_d.to_numpy()
69
+ print(model.get_booster().feature_names)
70
+ prediction = model.predict(input_d)
71
+ return prediction