ishantvivek commited on
Commit
dd81387
·
1 Parent(s): 2aad768

recommendation-system: Add backend server of fastapi and controllers

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.ipynb filter=lfs diff=lfs merge=lfs -text
37
+ pickles/* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *$py.class
4
+
5
+ # VS code files
6
+ .vscode
7
+
8
+ # PyInstaller
9
+ # Usually these files are written by a python script from a template
10
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
11
+ *.manifest
12
+ *.spec
13
+
14
+ # Jupyter Notebook
15
+ .ipynb_checkpoints
16
+
17
+
18
+ # Environments
19
+ .env
20
+ .venv
21
+ env/
22
+ venv/
23
+ ENV/
24
+ env.bak/
25
+ venv.bak/
Amazon_Apparel_Recommendations.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d5b298fb21eb86b9f23b1b9f470a0fb4003d2f2ed76dd022b36ac7cca1b26a
3
+ size 15872682
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.10
5
+
6
+ RUN useradd -m -u 1000 user
7
+
8
+ WORKDIR /app
9
+
10
+ COPY --chown=user ./requirements.txt requirements.txt
11
+
12
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
13
+
14
+ COPY --chown=user . /app
15
+
16
+ CMD ["uvicorn", "index:app", "--host", "0.0.0.0", "--port", "7860"]
Generate_Pickles.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0915000c7a35d57b93bef755d1d30394bd04df800c532a230a5de7ed0f662645
3
+ size 24722
controllers/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi_router_controller import ControllerLoader
3
+
4
+ this_dir = os.path.dirname(__file__)
5
+
6
+ ControllerLoader.load(this_dir, __package__)
controllers/recommendation_controller.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from fastapi.responses import PlainTextResponse
3
+ from fastapi_router_controller import Controller
4
+ from fastapi import APIRouter, File, UploadFile, HTTPException
5
+ from utils.BagOfWords import bag_of_words_model
6
+ from utils.cnn import extract_features_from_image, get_similar_products_cnn
7
+ from utils.logger import Logger
8
+ from utils.tfidf import tfidf_model
9
+ import shutil
10
+ import os
11
+
12
+ logger = Logger.get_logger(__name__)
13
+
14
+ router = APIRouter(prefix='/v1')
15
+ controller = Controller(router, openapi_tag={
16
+ 'name': 'Recommendation System',
17
+ })
18
+
19
+
20
+ @controller.use()
21
+ @controller.resource()
22
+ class RecommendationController():
23
+ def __init__(self):
24
+ pass
25
+ @controller.route.get(
26
+ '/recommend/bog',
27
+ tags=['recommend-apparel'],
28
+ summary='Recommends the apparel')
29
+ async def recommend(self, input: str):
30
+ try:
31
+ if not input:
32
+ logger.error('Input is required.')
33
+ raise HTTPException(
34
+ status_code=500, detail='Input is required.')
35
+
36
+ results = bag_of_words_model(input, 5)
37
+ return {"results": results}
38
+ except asyncio.CancelledError:
39
+ logger.error(
40
+ 'Canceling network request due to disconnect in client.')
41
+ except Exception as error:
42
+ logger.error('Error {}'.format(error))
43
+
44
+ @controller.route.get(
45
+ '/recommend/tfidf',
46
+ tags=['recommend-apparel'],
47
+ summary='Recommends the apparel')
48
+ async def recommend(self, input: str):
49
+ try:
50
+ if not input:
51
+ logger.error('Input is required.')
52
+ raise HTTPException(
53
+ status_code=500, detail='Input is required.')
54
+
55
+ results = tfidf_model(input, 5)
56
+ return {"results": results}
57
+ except asyncio.CancelledError:
58
+ logger.error(
59
+ 'Canceling network request due to disconnect in client.')
60
+ except Exception as error:
61
+ logger.error('Error {}'.format(error))
62
+
63
+ @controller.route.post(
64
+ '/recommend/cnn',
65
+ tags=['recommend-apparel'],
66
+ summary='Recommends the apparel')
67
+ async def recommend(self, file: UploadFile = File(...)):
68
+ try:
69
+ UPLOAD_FOLDER = 'uploads/'
70
+ # Save the uploaded file
71
+ file_path = os.path.join(UPLOAD_FOLDER, file.filename)
72
+ with open(file_path, "wb") as buffer:
73
+ shutil.copyfileobj(file.file, buffer)
74
+
75
+ # Process the uploaded image
76
+ with open(file_path, "rb") as img_file:
77
+ image_bytes = img_file.read()
78
+
79
+ # Log image size for debugging
80
+ if len(image_bytes) == 0:
81
+ raise HTTPException(status_code=400, detail="Uploaded image is empty.")
82
+ else:
83
+ print(f"Image size: {len(image_bytes)} bytes")
84
+
85
+ # Extract features from the image
86
+ image_features = extract_features_from_image(image_bytes)
87
+
88
+ results = get_similar_products_cnn(image_features, 5)
89
+ return {"results": results}
90
+ except asyncio.CancelledError:
91
+ logger.error(
92
+ 'Canceling network request due to disconnect in client.')
93
+ except Exception as error:
94
+ logger.error('Error {}'.format(error))
95
+
96
+ @controller.route.get(
97
+ '/deleteUpload')
98
+ async def recommend(self, password: str):
99
+ try:
100
+ if not password:
101
+ logger.error('Password is required.')
102
+ raise HTTPException(
103
+ status_code=500, detail='Password is required.')
104
+
105
+ if password != "1328":
106
+ return {"results": "Unauthorized: Incorrect password"}
107
+
108
+ UPLOAD_FOLDER = 'uploads/'
109
+ # Check if the uploads folder exists
110
+ if not os.path.exists(UPLOAD_FOLDER):
111
+ raise HTTPException(status_code=404, detail="Uploads folder does not exist")
112
+ # List all files in the uploads folder
113
+ files = os.listdir(UPLOAD_FOLDER)
114
+ if not files:
115
+ return {"results": "No files to delete"}
116
+ # Delete all files in the uploads folder
117
+ for file in files:
118
+ file_path = os.path.join(UPLOAD_FOLDER, file)
119
+ try:
120
+ # Check if it is a file before trying to delete it
121
+ if os.path.isfile(file_path):
122
+ os.remove(file_path)
123
+ print(f"Deleted: {file_path}")
124
+ else:
125
+ print(f"Skipping directory: {file_path}")
126
+ except Exception as e:
127
+ print(f"Error deleting file {file_path}: {str(e)}")
128
+
129
+ return {"results": "All files have been deleted successfully."}
130
+ except asyncio.CancelledError:
131
+ logger.error(
132
+ 'Canceling network request due to disconnect in client.')
133
+ except Exception as error:
134
+ logger.error('Error {}'.format(error))
135
+
136
+ @controller.route.get(
137
+ '/readLogs')
138
+ async def readLogs(self):
139
+ try:
140
+ # Check if the log file exists
141
+ LOG_FILE_PATH = 'logs.log'
142
+ if not os.path.exists(LOG_FILE_PATH):
143
+ raise HTTPException(status_code=404, detail="Log file not found")
144
+
145
+ # Read the log file content
146
+ try:
147
+ with open(LOG_FILE_PATH, 'r') as log_file:
148
+ logs_content = log_file.read()
149
+ except Exception as e:
150
+ raise HTTPException(status_code=500, detail=f"Error reading log file: {str(e)}")
151
+
152
+ # Return the log content as plain text
153
+ return PlainTextResponse(content=logs_content)
154
+ except asyncio.CancelledError:
155
+ logger.error(
156
+ 'Canceling network request due to disconnect in client.')
157
+ except Exception as error:
158
+ logger.error('Error {}'.format(error))
index.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import controllers
2
+
3
+ from fastapi import FastAPI
4
+ from fastapi_router_controller import Controller, ControllersTags
5
+
6
+ from utils.config import Config
7
+ from utils.middleware import LogIncomingRequest
8
+ from utils.middleware.request_cancellation import RequestCancellation
9
+
10
+ #########################################
11
+ #### Configure the main application #####
12
+ #########################################
13
+ app = FastAPI(
14
+ title='{}'.format(Config.read('app', 'name')),
15
+ openapi_tags=ControllersTags)
16
+
17
+ app.add_middleware(LogIncomingRequest)
18
+ app.add_middleware(RequestCancellation)
19
+
20
+ for router in Controller.routers():
21
+ app.include_router(router)
logs.log ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-20T00:26:48.930 INFO utils.middleware.log_incoming_request - None - start
2
+ 2025-01-20T00:26:48.930 INFO utils.middleware.log_incoming_request - None - end in time (ms): 0.00
3
+ 2025-01-20T00:26:48.930 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
4
+ 2025-01-20T00:27:28.156 INFO utils.middleware.log_incoming_request - recommend - start
5
+ 2025-01-20T00:27:28.171 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 15.58
6
+ 2025-01-20T00:27:28.171 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
7
+ 2025-01-20T00:28:04.793 INFO utils.middleware.log_incoming_request - None - start
8
+ 2025-01-20T00:28:04.793 INFO utils.middleware.log_incoming_request - None - end in time (ms): 0.00
9
+ 2025-01-20T00:28:04.815 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
10
+ 2025-01-20T00:28:24.316 INFO utils.middleware.log_incoming_request - recommend - start
11
+ 2025-01-20T00:28:24.316 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 0.00
12
+ 2025-01-20T00:28:24.316 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
13
+ 2025-01-20T00:28:27.607 INFO utils.middleware.log_incoming_request - recommend - start
14
+ 2025-01-20T00:28:27.623 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 15.92
15
+ 2025-01-20T00:28:27.623 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
16
+ 2025-01-20T00:28:56.577 INFO utils.middleware.log_incoming_request - recommend - start
17
+ 2025-01-20T00:28:56.577 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 0.00
18
+ 2025-01-20T00:28:56.577 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
19
+ 2025-01-20T00:30:32.226 INFO utils.middleware.log_incoming_request - recommend - start
20
+ 2025-01-20T00:30:32.242 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 15.63
21
+ 2025-01-20T00:30:32.242 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
22
+ 2025-01-20T00:30:38.842 INFO utils.middleware.log_incoming_request - None - start
23
+ 2025-01-20T00:30:38.842 INFO utils.middleware.log_incoming_request - None - end in time (ms): 0.00
24
+ 2025-01-20T00:30:38.842 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
25
+ 2025-01-20T00:31:00.241 INFO utils.middleware.log_incoming_request - recommend - start
26
+ 2025-01-20T00:31:28.075 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 27834.65
27
+ 2025-01-20T00:31:28.075 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
28
+ 2025-01-20T00:35:59.867 INFO utils.middleware.log_incoming_request - recommend - start
29
+ 2025-01-20T00:36:03.194 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 3326.88
30
+ 2025-01-20T00:36:03.210 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
31
+ 2025-01-20T00:43:36.816 INFO utils.middleware.log_incoming_request - recommend - start
32
+ 2025-01-20T00:43:36.842 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 24.15
33
+ 2025-01-20T00:43:36.842 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
34
+ 2025-01-20T00:43:45.701 INFO utils.middleware.log_incoming_request - recommend - start
35
+ 2025-01-20T00:43:45.704 ERROR controllers.recommendation_controller - Error 401: Unauthorized: Incorrect password
36
+ 2025-01-20T00:43:45.705 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 3.01
37
+ 2025-01-20T00:43:45.715 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
38
+ 2025-01-20T00:43:52.867 INFO utils.middleware.log_incoming_request - recommend - start
39
+ 2025-01-20T00:44:04.109 ERROR controllers.recommendation_controller - Error 401: Unauthorized: Incorrect password
40
+ 2025-01-20T00:44:04.134 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 11267.24
41
+ 2025-01-20T00:44:04.141 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
42
+ 2025-01-20T00:44:21.927 INFO utils.middleware.log_incoming_request - recommend - start
43
+ 2025-01-20T00:44:24.535 ERROR controllers.recommendation_controller - Error 401: Unauthorized: Incorrect password
44
+ 2025-01-20T00:44:24.535 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 2607.53
45
+ 2025-01-20T00:44:24.535 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
46
+ 2025-01-20T00:45:10.763 INFO utils.middleware.log_incoming_request - recommend - start
47
+ 2025-01-20T00:45:15.836 ERROR controllers.recommendation_controller - Error 401: Unauthorized: Incorrect password
48
+ 2025-01-20T00:45:15.861 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 5097.82
49
+ 2025-01-20T00:45:15.871 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
50
+ 2025-01-20T00:45:22.052 INFO utils.middleware.log_incoming_request - recommend - start
51
+ 2025-01-20T00:45:29.665 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 7612.25
52
+ 2025-01-20T00:45:29.671 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
53
+ 2025-01-20T00:46:18.791 INFO utils.middleware.log_incoming_request - recommend - start
54
+ 2025-01-20T00:46:41.889 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 23098.92
55
+ 2025-01-20T00:46:41.905 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
56
+ 2025-01-20T00:46:50.386 INFO utils.middleware.log_incoming_request - recommend - start
57
+ 2025-01-20T00:46:53.681 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 3295.55
58
+ 2025-01-20T00:46:53.697 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
59
+ 2025-01-20T00:47:04.192 INFO utils.middleware.log_incoming_request - recommend - start
60
+ 2025-01-20T00:47:04.196 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 3.72
61
+ 2025-01-20T00:47:04.196 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
62
+ 2025-01-20T00:47:09.505 INFO utils.middleware.log_incoming_request - recommend - start
63
+ 2025-01-20T00:47:09.513 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 8.01
64
+ 2025-01-20T00:47:09.519 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
65
+ 2025-01-20T00:50:05.276 INFO utils.middleware.log_incoming_request - readLogs - start
66
+ 2025-01-20T00:50:05.546 INFO utils.middleware.log_incoming_request - readLogs - end in time (ms): 270.15
67
+ 2025-01-20T00:50:05.556 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
68
+ 2025-01-20T00:50:10.956 INFO utils.middleware.log_incoming_request - readLogs - start
69
+ 2025-01-20T00:50:10.966 INFO utils.middleware.log_incoming_request - readLogs - end in time (ms): 10.31
70
+ 2025-01-20T00:50:10.980 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
71
+ 2025-01-20T00:50:13.188 INFO utils.middleware.log_incoming_request - readLogs - start
72
+ 2025-01-20T00:50:13.206 INFO utils.middleware.log_incoming_request - readLogs - end in time (ms): 17.76
73
+ 2025-01-20T00:50:13.212 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
74
+ 2025-01-20T00:50:34.126 INFO utils.middleware.log_incoming_request - recommend - start
75
+ 2025-01-20T00:50:34.136 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 10.13
76
+ 2025-01-20T00:50:34.143 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
77
+ 2025-01-20T00:50:39.423 INFO utils.middleware.log_incoming_request - recommend - start
78
+ 2025-01-20T00:50:39.473 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 49.81
79
+ 2025-01-20T00:50:39.473 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
pickles/16k_apparrel_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b22ccca564ec344593b30c8b4657493693e56d80e214ea5b04380eb7d33cae45
3
+ size 3243088
pickles/17k_apparrel_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae9951b598965b93e5042e5fe576a2dd92826e4a4c16de4659c8ae5afe542c2
3
+ size 14642960
pickles/28k_apparel_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58ef313a0b016e5914bf1cb9990c502230821585cc702b55f2ea04a1c2cb4f55
3
+ size 24447034
requirement.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ fastapi-router-controller
3
+ Pillow
4
+ matplotlib
5
+ nltk
6
+ seaborn
7
+ scikit-learn
8
+ plotly
9
+ gensim
10
+ keras
11
+ tensorflow
12
+ python-multipart
utils/BagOfWords.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.metrics.pairwise import pairwise_distances
4
+ from sklearn.feature_extraction.text import CountVectorizer
5
+ from typing import List, Dict
6
+ import os
7
+
8
+ from utils.config import Config
9
+
10
+ # Load the dataset (replace with the actual path to your dataset)
11
+ dataset_path = Config.read('app', 'dataset')
12
+
13
+ # Ensure the dataset exists
14
+ if not os.path.exists(dataset_path):
15
+ raise FileNotFoundError(f"The dataset file at {dataset_path} was not found.")
16
+
17
+ # Load the dataset
18
+ data = pd.read_pickle(dataset_path)
19
+
20
+ # Ensure the dataset has the necessary columns: 'asin', 'title', 'brand', 'medium_image_url'
21
+ required_columns = ['asin', 'title', 'brand', 'medium_image_url']
22
+ for col in required_columns:
23
+ if col not in data.columns:
24
+ raise ValueError(f"Missing required column: {col} in the dataset")
25
+
26
+ # Set up the vectorizer and fit the model
27
+ title_vectorizer = CountVectorizer()
28
+ title_features = title_vectorizer.fit_transform(data['title'])
29
+
30
+ # Function to calculate the bag-of-words model and return closest matches
31
+ def bag_of_words_model(query: str, num_results: int) -> List[Dict]:
32
+ # Transform the input query to the same feature space
33
+ query_vec = title_vectorizer.transform([query])
34
+
35
+ # Calculate pairwise distances between the query and all items in the corpus
36
+ pairwise_dist = pairwise_distances(title_features, query_vec, metric='cosine')
37
+
38
+ # Get the indices of the closest matches
39
+ indices = np.argsort(pairwise_dist.flatten())[0:num_results]
40
+
41
+ results = []
42
+ for idx in indices:
43
+ result = {
44
+ 'asin': data['asin'].iloc[idx],
45
+ 'brand': data['brand'].iloc[idx],
46
+ 'title': data['title'].iloc[idx],
47
+ 'url': data['medium_image_url'].iloc[idx],
48
+ }
49
+ results.append(result)
50
+
51
+ return results
utils/cnn.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.metrics.pairwise import pairwise_distances
3
+ from typing import List, Dict
4
+ from utils.config import Config
5
+ from PIL import Image
6
+ import pandas as pd
7
+ import tensorflow as tf
8
+ import io
9
+ import os
10
+
11
+ # Load the dataset (replace with the actual path to your dataset)
12
+ dataset_path = Config.read('app', 'dataset')
13
+
14
+ # Ensure the dataset exists
15
+ if not os.path.exists(dataset_path):
16
+ raise FileNotFoundError(f"The dataset file at {dataset_path} was not found.")
17
+
18
+ # Load the dataset
19
+ data = pd.read_pickle(dataset_path)
20
+
21
+ # Ensure the dataset has the necessary columns: 'asin', 'title', 'brand', 'medium_image_url'
22
+ required_columns = ['asin', 'title', 'brand', 'medium_image_url']
23
+ for col in required_columns:
24
+ if col not in data.columns:
25
+ raise ValueError(f"Missing required column: {col} in the dataset")
26
+
27
+ # Load the pre-trained CNN features and corresponding ASINs
28
+ bottleneck_features_train = np.load(Config.read('app', 'cnnmodel'))
29
+ bottleneck_features_train = bottleneck_features_train.astype(np.float64)
30
+ asins = np.load(Config.read('app', 'cssasins'))
31
+ asins = list(asins)
32
+
33
+
34
+ # Helper function to extract features from the uploaded image using a pre-trained model
35
+ def extract_features_from_image(image_bytes):
36
+ image = Image.open(io.BytesIO(image_bytes))
37
+ image = image.resize((224, 224))
38
+ image_array = np.array(image) / 255.0
39
+ image_array = np.expand_dims(image_array, axis=0)
40
+
41
+ # Load the VGG16 model for feature extraction
42
+ model = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
43
+ features = model.predict(image_array)
44
+ features = features.flatten()
45
+
46
+ return features
47
+
48
+ # Function to get similar products based on CNN features
49
+ def get_similar_products_cnn(image_features, num_results: int) -> List[Dict]:
50
+
51
+ pairwise_dist = pairwise_distances(bottleneck_features_train, image_features.reshape(1, -1))
52
+
53
+ # Get the indices of the closest products
54
+ indices = np.argsort(pairwise_dist.flatten())[0:num_results]
55
+
56
+ results = []
57
+ for i in range(len(indices)):
58
+ # Get the product details for each closest match
59
+ product_details = data[['asin', 'brand', 'title', 'medium_image_url']].loc[data['asin'] == asins[indices[i]]]
60
+ for indx, row in product_details.iterrows():
61
+ result = {
62
+ 'asin': row['asin'],
63
+ 'brand': row['brand'],
64
+ 'title': row['title'],
65
+ 'url': row['medium_image_url']
66
+ }
67
+ results.append(result)
68
+
69
+ return results
70
+
utils/config/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from configparser import ConfigParser
4
+
5
+ this_dir = Path(__file__).parent
6
+ conf_dir = this_dir / 'properties.ini'
7
+
8
+ parser = ConfigParser(os.environ)
9
+ parser.read(conf_dir, encoding="utf8")
10
+
11
+
12
+ class Config():
13
+ @staticmethod
14
+ def read(section, property, default=None):
15
+ return parser.get(section, property) or default
utils/config/properties.ini ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [app]
2
+ name=Apparel Recommendation System
3
+ env=%(ENV)
4
+ dataset=pickles/16k_apparrel_data
5
+ cnnmodel=models/16k_data_cnn_features.npy
6
+ cssasins=models/16k_data_cnn_feature_asins.npy
7
+
8
+ # we are currently using this model
9
+ model=ibm-granite/granite-3b-code-base
10
+
11
+ [log]
12
+ level=INFO
13
+ filename=./logs.log
14
+ dateformat=%%Y-%%m-%%dT%%H:%%M:%%S
15
+ format=%%(asctime)s.%%(msecs)03d %%(levelname)5s %%(name)s - %%(message)s
utils/logger.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from utils.config import Config
4
+
5
+
6
+ class Logger():
7
+ CONFIG_KEY = 'log'
8
+
9
+ @staticmethod
10
+ def get_level():
11
+ return Config.read(Logger.CONFIG_KEY, 'level')
12
+
13
+ @staticmethod
14
+ def get_filename():
15
+ return Config.read(Logger.CONFIG_KEY, 'filename')
16
+
17
+ @staticmethod
18
+ def get_format():
19
+ return Config.read(Logger.CONFIG_KEY, 'format')
20
+
21
+ @staticmethod
22
+ def get_date_format():
23
+ return Config.read(Logger.CONFIG_KEY, 'dateformat')
24
+
25
+ @staticmethod
26
+ def get_logger(name):
27
+ logger = logging.getLogger(name)
28
+ logger.setLevel(Logger.get_level()) # type: ignore
29
+
30
+ formatter = logging.Formatter(
31
+ Logger.get_format(),
32
+ Logger.get_date_format())
33
+
34
+ file_hdlr = logging.FileHandler(Logger.get_filename()) # type: ignore
35
+ file_hdlr.setFormatter(formatter)
36
+ logger.addHandler(hdlr=file_hdlr)
37
+
38
+ return logger
utils/middleware/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from utils.middleware.log_incoming_request import LogIncomingRequest
2
+
3
+ __all__ = [
4
+ 'LogIncomingRequest'
5
+ ]
utils/middleware/log_incoming_request.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import List
3
+ from fastapi import Request
4
+ from fastapi.routing import APIRoute
5
+ from starlette.middleware.base import BaseHTTPMiddleware
6
+ from utils.logger import Logger
7
+
8
+ logger = Logger.get_logger(__name__)
9
+
10
+
11
+ class LogIncomingRequest(BaseHTTPMiddleware):
12
+ def __get_request_handler(_, req: Request): # type: ignore
13
+ # get controller from request
14
+ routes: List[APIRoute] = req.app.routes
15
+ for route in routes:
16
+ if route.path_regex.match(req.url.path) and req.method in route.methods:
17
+ return route.endpoint.__name__ if hasattr(route.endpoint, '__name__') else 'fastapi_core'
18
+
19
+ async def dispatch(self, request: Request, call_next):
20
+ func_name = self.__get_request_handler(request)
21
+ request.state.func_name = func_name
22
+
23
+ logger.info('{} - start'.format(func_name))
24
+ start_time = time.time()
25
+
26
+ response = await call_next(request)
27
+
28
+ process_time = (time.time() - start_time) * 1000
29
+ formatted_process_time = '{0:.2f}'.format(process_time)
30
+ logger.info('{} - end in time (ms): {}'.format(func_name,
31
+ formatted_process_time))
32
+ return response
utils/middleware/request_cancellation.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from utils.logger import Logger
3
+
4
+ logger = Logger.get_logger(__name__)
5
+
6
+
7
+ class RequestCancellation:
8
+ """
9
+ RequestCancellation middleware handles request canceling
10
+ * In case of API routes where very frequent/expensive requests are made.
11
+ """
12
+
13
+ def __init__(self, app):
14
+ self.app = app
15
+
16
+ async def __call__(self, scope, receive, send):
17
+ if scope["type"] != "http":
18
+ await self.app(scope, receive, send)
19
+ return
20
+
21
+ queue = asyncio.Queue()
22
+
23
+ async def message_poller(sentinel, handler_task):
24
+ nonlocal queue
25
+ while True:
26
+ message = await receive()
27
+ if message["type"] == "http.disconnect":
28
+ handler_task.cancel()
29
+ return sentinel
30
+ await queue.put(message)
31
+
32
+ sentinel = object()
33
+ handler_task = asyncio.create_task(self.app(scope, queue.get, send))
34
+ asyncio.create_task(message_poller(sentinel, handler_task))
35
+
36
+ try:
37
+ return await handler_task
38
+ except asyncio.CancelledError:
39
+ logger.info('Task Cancellatation Requested.')
utils/tfidf.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import pairwise_distances
5
+ from typing import List, Dict
6
+ from utils.config import Config
7
+ import os
8
+
9
+
10
+ # Load the dataset (replace with the actual path to your dataset)
11
+ dataset_path = Config.read('app', 'dataset')
12
+
13
+ # Ensure the dataset exists
14
+ if not os.path.exists(dataset_path):
15
+ raise FileNotFoundError(f"The dataset file at {dataset_path} was not found.")
16
+
17
+ # Load the dataset
18
+ data = pd.read_pickle(dataset_path)
19
+
20
+ # Ensure the dataset has the necessary columns: 'asin', 'title', 'brand', 'medium_image_url'
21
+ required_columns = ['asin', 'title', 'brand', 'medium_image_url']
22
+ for col in required_columns:
23
+ if col not in data.columns:
24
+ raise ValueError(f"Missing required column: {col} in the dataset")
25
+
26
+ # Set up the vectorizer and fit the model
27
+ tfidf_title_vectorizer = TfidfVectorizer(min_df = 0.0)
28
+ tfidf_title_features = tfidf_title_vectorizer.fit_transform(data['title'])
29
+
30
+ # Function to calculate the tf-idf model and return closest matches
31
+ def tfidf_model(input_text: str, num_results: int) -> List[Dict]:
32
+
33
+ # Transform the input text to the same TF-IDF feature space
34
+ query_vec = tfidf_title_vectorizer.transform([input_text])
35
+
36
+ pairwise_dist = pairwise_distances(tfidf_title_features, query_vec)
37
+
38
+ # np.argsort will return indices of 9 smallest distances
39
+ indices = np.argsort(pairwise_dist.flatten())[0:num_results]
40
+
41
+ #data frame indices of the 9 smallest distace's
42
+ df_indices = list(data.index[indices])
43
+
44
+ results = []
45
+ for i in range(0,len(indices)):
46
+ result = {
47
+ 'asin': data['asin'].loc[df_indices[i]],
48
+ 'brand': data['brand'].loc[df_indices[i]],
49
+ 'title': data['title'].loc[df_indices[i]],
50
+ 'url': data['medium_image_url'].loc[df_indices[i]]
51
+ }
52
+ results.append(result)
53
+
54
+ return results