Spaces:
Sleeping
Sleeping
ishantvivek
commited on
Commit
·
dd81387
1
Parent(s):
2aad768
recommendation-system: Add backend server of fastapi and controllers
Browse files- .gitattributes +2 -0
- .gitignore +25 -0
- Amazon_Apparel_Recommendations.ipynb +3 -0
- Dockerfile +16 -0
- Generate_Pickles.ipynb +3 -0
- controllers/__init__.py +6 -0
- controllers/recommendation_controller.py +158 -0
- index.py +21 -0
- logs.log +79 -0
- pickles/16k_apparrel_data +3 -0
- pickles/17k_apparrel_data +3 -0
- pickles/28k_apparel_data +3 -0
- requirement.txt +12 -0
- utils/BagOfWords.py +51 -0
- utils/cnn.py +70 -0
- utils/config/__init__.py +15 -0
- utils/config/properties.ini +15 -0
- utils/logger.py +38 -0
- utils/middleware/__init__.py +5 -0
- utils/middleware/log_incoming_request.py +32 -0
- utils/middleware/request_cancellation.py +39 -0
- utils/tfidf.py +54 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.ipynb filter=lfs diff=lfs merge=lfs -text
|
37 |
+
pickles/* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*$py.class
|
4 |
+
|
5 |
+
# VS code files
|
6 |
+
.vscode
|
7 |
+
|
8 |
+
# PyInstaller
|
9 |
+
# Usually these files are written by a python script from a template
|
10 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
11 |
+
*.manifest
|
12 |
+
*.spec
|
13 |
+
|
14 |
+
# Jupyter Notebook
|
15 |
+
.ipynb_checkpoints
|
16 |
+
|
17 |
+
|
18 |
+
# Environments
|
19 |
+
.env
|
20 |
+
.venv
|
21 |
+
env/
|
22 |
+
venv/
|
23 |
+
ENV/
|
24 |
+
env.bak/
|
25 |
+
venv.bak/
|
Amazon_Apparel_Recommendations.ipynb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70d5b298fb21eb86b9f23b1b9f470a0fb4003d2f2ed76dd022b36ac7cca1b26a
|
3 |
+
size 15872682
|
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.10
|
5 |
+
|
6 |
+
RUN useradd -m -u 1000 user
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
+
|
12 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
13 |
+
|
14 |
+
COPY --chown=user . /app
|
15 |
+
|
16 |
+
CMD ["uvicorn", "index:app", "--host", "0.0.0.0", "--port", "7860"]
|
Generate_Pickles.ipynb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0915000c7a35d57b93bef755d1d30394bd04df800c532a230a5de7ed0f662645
|
3 |
+
size 24722
|
controllers/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from fastapi_router_controller import ControllerLoader
|
3 |
+
|
4 |
+
this_dir = os.path.dirname(__file__)
|
5 |
+
|
6 |
+
ControllerLoader.load(this_dir, __package__)
|
controllers/recommendation_controller.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
from fastapi.responses import PlainTextResponse
|
3 |
+
from fastapi_router_controller import Controller
|
4 |
+
from fastapi import APIRouter, File, UploadFile, HTTPException
|
5 |
+
from utils.BagOfWords import bag_of_words_model
|
6 |
+
from utils.cnn import extract_features_from_image, get_similar_products_cnn
|
7 |
+
from utils.logger import Logger
|
8 |
+
from utils.tfidf import tfidf_model
|
9 |
+
import shutil
|
10 |
+
import os
|
11 |
+
|
12 |
+
logger = Logger.get_logger(__name__)
|
13 |
+
|
14 |
+
router = APIRouter(prefix='/v1')
|
15 |
+
controller = Controller(router, openapi_tag={
|
16 |
+
'name': 'Recommendation System',
|
17 |
+
})
|
18 |
+
|
19 |
+
|
20 |
+
@controller.use()
|
21 |
+
@controller.resource()
|
22 |
+
class RecommendationController():
|
23 |
+
def __init__(self):
|
24 |
+
pass
|
25 |
+
@controller.route.get(
|
26 |
+
'/recommend/bog',
|
27 |
+
tags=['recommend-apparel'],
|
28 |
+
summary='Recommends the apparel')
|
29 |
+
async def recommend(self, input: str):
|
30 |
+
try:
|
31 |
+
if not input:
|
32 |
+
logger.error('Input is required.')
|
33 |
+
raise HTTPException(
|
34 |
+
status_code=500, detail='Input is required.')
|
35 |
+
|
36 |
+
results = bag_of_words_model(input, 5)
|
37 |
+
return {"results": results}
|
38 |
+
except asyncio.CancelledError:
|
39 |
+
logger.error(
|
40 |
+
'Canceling network request due to disconnect in client.')
|
41 |
+
except Exception as error:
|
42 |
+
logger.error('Error {}'.format(error))
|
43 |
+
|
44 |
+
@controller.route.get(
|
45 |
+
'/recommend/tfidf',
|
46 |
+
tags=['recommend-apparel'],
|
47 |
+
summary='Recommends the apparel')
|
48 |
+
async def recommend(self, input: str):
|
49 |
+
try:
|
50 |
+
if not input:
|
51 |
+
logger.error('Input is required.')
|
52 |
+
raise HTTPException(
|
53 |
+
status_code=500, detail='Input is required.')
|
54 |
+
|
55 |
+
results = tfidf_model(input, 5)
|
56 |
+
return {"results": results}
|
57 |
+
except asyncio.CancelledError:
|
58 |
+
logger.error(
|
59 |
+
'Canceling network request due to disconnect in client.')
|
60 |
+
except Exception as error:
|
61 |
+
logger.error('Error {}'.format(error))
|
62 |
+
|
63 |
+
@controller.route.post(
|
64 |
+
'/recommend/cnn',
|
65 |
+
tags=['recommend-apparel'],
|
66 |
+
summary='Recommends the apparel')
|
67 |
+
async def recommend(self, file: UploadFile = File(...)):
|
68 |
+
try:
|
69 |
+
UPLOAD_FOLDER = 'uploads/'
|
70 |
+
# Save the uploaded file
|
71 |
+
file_path = os.path.join(UPLOAD_FOLDER, file.filename)
|
72 |
+
with open(file_path, "wb") as buffer:
|
73 |
+
shutil.copyfileobj(file.file, buffer)
|
74 |
+
|
75 |
+
# Process the uploaded image
|
76 |
+
with open(file_path, "rb") as img_file:
|
77 |
+
image_bytes = img_file.read()
|
78 |
+
|
79 |
+
# Log image size for debugging
|
80 |
+
if len(image_bytes) == 0:
|
81 |
+
raise HTTPException(status_code=400, detail="Uploaded image is empty.")
|
82 |
+
else:
|
83 |
+
print(f"Image size: {len(image_bytes)} bytes")
|
84 |
+
|
85 |
+
# Extract features from the image
|
86 |
+
image_features = extract_features_from_image(image_bytes)
|
87 |
+
|
88 |
+
results = get_similar_products_cnn(image_features, 5)
|
89 |
+
return {"results": results}
|
90 |
+
except asyncio.CancelledError:
|
91 |
+
logger.error(
|
92 |
+
'Canceling network request due to disconnect in client.')
|
93 |
+
except Exception as error:
|
94 |
+
logger.error('Error {}'.format(error))
|
95 |
+
|
96 |
+
@controller.route.get(
|
97 |
+
'/deleteUpload')
|
98 |
+
async def recommend(self, password: str):
|
99 |
+
try:
|
100 |
+
if not password:
|
101 |
+
logger.error('Password is required.')
|
102 |
+
raise HTTPException(
|
103 |
+
status_code=500, detail='Password is required.')
|
104 |
+
|
105 |
+
if password != "1328":
|
106 |
+
return {"results": "Unauthorized: Incorrect password"}
|
107 |
+
|
108 |
+
UPLOAD_FOLDER = 'uploads/'
|
109 |
+
# Check if the uploads folder exists
|
110 |
+
if not os.path.exists(UPLOAD_FOLDER):
|
111 |
+
raise HTTPException(status_code=404, detail="Uploads folder does not exist")
|
112 |
+
# List all files in the uploads folder
|
113 |
+
files = os.listdir(UPLOAD_FOLDER)
|
114 |
+
if not files:
|
115 |
+
return {"results": "No files to delete"}
|
116 |
+
# Delete all files in the uploads folder
|
117 |
+
for file in files:
|
118 |
+
file_path = os.path.join(UPLOAD_FOLDER, file)
|
119 |
+
try:
|
120 |
+
# Check if it is a file before trying to delete it
|
121 |
+
if os.path.isfile(file_path):
|
122 |
+
os.remove(file_path)
|
123 |
+
print(f"Deleted: {file_path}")
|
124 |
+
else:
|
125 |
+
print(f"Skipping directory: {file_path}")
|
126 |
+
except Exception as e:
|
127 |
+
print(f"Error deleting file {file_path}: {str(e)}")
|
128 |
+
|
129 |
+
return {"results": "All files have been deleted successfully."}
|
130 |
+
except asyncio.CancelledError:
|
131 |
+
logger.error(
|
132 |
+
'Canceling network request due to disconnect in client.')
|
133 |
+
except Exception as error:
|
134 |
+
logger.error('Error {}'.format(error))
|
135 |
+
|
136 |
+
@controller.route.get(
|
137 |
+
'/readLogs')
|
138 |
+
async def readLogs(self):
|
139 |
+
try:
|
140 |
+
# Check if the log file exists
|
141 |
+
LOG_FILE_PATH = 'logs.log'
|
142 |
+
if not os.path.exists(LOG_FILE_PATH):
|
143 |
+
raise HTTPException(status_code=404, detail="Log file not found")
|
144 |
+
|
145 |
+
# Read the log file content
|
146 |
+
try:
|
147 |
+
with open(LOG_FILE_PATH, 'r') as log_file:
|
148 |
+
logs_content = log_file.read()
|
149 |
+
except Exception as e:
|
150 |
+
raise HTTPException(status_code=500, detail=f"Error reading log file: {str(e)}")
|
151 |
+
|
152 |
+
# Return the log content as plain text
|
153 |
+
return PlainTextResponse(content=logs_content)
|
154 |
+
except asyncio.CancelledError:
|
155 |
+
logger.error(
|
156 |
+
'Canceling network request due to disconnect in client.')
|
157 |
+
except Exception as error:
|
158 |
+
logger.error('Error {}'.format(error))
|
index.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import controllers
|
2 |
+
|
3 |
+
from fastapi import FastAPI
|
4 |
+
from fastapi_router_controller import Controller, ControllersTags
|
5 |
+
|
6 |
+
from utils.config import Config
|
7 |
+
from utils.middleware import LogIncomingRequest
|
8 |
+
from utils.middleware.request_cancellation import RequestCancellation
|
9 |
+
|
10 |
+
#########################################
|
11 |
+
#### Configure the main application #####
|
12 |
+
#########################################
|
13 |
+
app = FastAPI(
|
14 |
+
title='{}'.format(Config.read('app', 'name')),
|
15 |
+
openapi_tags=ControllersTags)
|
16 |
+
|
17 |
+
app.add_middleware(LogIncomingRequest)
|
18 |
+
app.add_middleware(RequestCancellation)
|
19 |
+
|
20 |
+
for router in Controller.routers():
|
21 |
+
app.include_router(router)
|
logs.log
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-01-20T00:26:48.930 INFO utils.middleware.log_incoming_request - None - start
|
2 |
+
2025-01-20T00:26:48.930 INFO utils.middleware.log_incoming_request - None - end in time (ms): 0.00
|
3 |
+
2025-01-20T00:26:48.930 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
4 |
+
2025-01-20T00:27:28.156 INFO utils.middleware.log_incoming_request - recommend - start
|
5 |
+
2025-01-20T00:27:28.171 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 15.58
|
6 |
+
2025-01-20T00:27:28.171 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
7 |
+
2025-01-20T00:28:04.793 INFO utils.middleware.log_incoming_request - None - start
|
8 |
+
2025-01-20T00:28:04.793 INFO utils.middleware.log_incoming_request - None - end in time (ms): 0.00
|
9 |
+
2025-01-20T00:28:04.815 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
10 |
+
2025-01-20T00:28:24.316 INFO utils.middleware.log_incoming_request - recommend - start
|
11 |
+
2025-01-20T00:28:24.316 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 0.00
|
12 |
+
2025-01-20T00:28:24.316 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
13 |
+
2025-01-20T00:28:27.607 INFO utils.middleware.log_incoming_request - recommend - start
|
14 |
+
2025-01-20T00:28:27.623 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 15.92
|
15 |
+
2025-01-20T00:28:27.623 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
16 |
+
2025-01-20T00:28:56.577 INFO utils.middleware.log_incoming_request - recommend - start
|
17 |
+
2025-01-20T00:28:56.577 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 0.00
|
18 |
+
2025-01-20T00:28:56.577 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
19 |
+
2025-01-20T00:30:32.226 INFO utils.middleware.log_incoming_request - recommend - start
|
20 |
+
2025-01-20T00:30:32.242 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 15.63
|
21 |
+
2025-01-20T00:30:32.242 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
22 |
+
2025-01-20T00:30:38.842 INFO utils.middleware.log_incoming_request - None - start
|
23 |
+
2025-01-20T00:30:38.842 INFO utils.middleware.log_incoming_request - None - end in time (ms): 0.00
|
24 |
+
2025-01-20T00:30:38.842 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
25 |
+
2025-01-20T00:31:00.241 INFO utils.middleware.log_incoming_request - recommend - start
|
26 |
+
2025-01-20T00:31:28.075 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 27834.65
|
27 |
+
2025-01-20T00:31:28.075 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
28 |
+
2025-01-20T00:35:59.867 INFO utils.middleware.log_incoming_request - recommend - start
|
29 |
+
2025-01-20T00:36:03.194 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 3326.88
|
30 |
+
2025-01-20T00:36:03.210 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
31 |
+
2025-01-20T00:43:36.816 INFO utils.middleware.log_incoming_request - recommend - start
|
32 |
+
2025-01-20T00:43:36.842 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 24.15
|
33 |
+
2025-01-20T00:43:36.842 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
34 |
+
2025-01-20T00:43:45.701 INFO utils.middleware.log_incoming_request - recommend - start
|
35 |
+
2025-01-20T00:43:45.704 ERROR controllers.recommendation_controller - Error 401: Unauthorized: Incorrect password
|
36 |
+
2025-01-20T00:43:45.705 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 3.01
|
37 |
+
2025-01-20T00:43:45.715 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
38 |
+
2025-01-20T00:43:52.867 INFO utils.middleware.log_incoming_request - recommend - start
|
39 |
+
2025-01-20T00:44:04.109 ERROR controllers.recommendation_controller - Error 401: Unauthorized: Incorrect password
|
40 |
+
2025-01-20T00:44:04.134 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 11267.24
|
41 |
+
2025-01-20T00:44:04.141 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
42 |
+
2025-01-20T00:44:21.927 INFO utils.middleware.log_incoming_request - recommend - start
|
43 |
+
2025-01-20T00:44:24.535 ERROR controllers.recommendation_controller - Error 401: Unauthorized: Incorrect password
|
44 |
+
2025-01-20T00:44:24.535 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 2607.53
|
45 |
+
2025-01-20T00:44:24.535 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
46 |
+
2025-01-20T00:45:10.763 INFO utils.middleware.log_incoming_request - recommend - start
|
47 |
+
2025-01-20T00:45:15.836 ERROR controllers.recommendation_controller - Error 401: Unauthorized: Incorrect password
|
48 |
+
2025-01-20T00:45:15.861 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 5097.82
|
49 |
+
2025-01-20T00:45:15.871 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
50 |
+
2025-01-20T00:45:22.052 INFO utils.middleware.log_incoming_request - recommend - start
|
51 |
+
2025-01-20T00:45:29.665 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 7612.25
|
52 |
+
2025-01-20T00:45:29.671 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
53 |
+
2025-01-20T00:46:18.791 INFO utils.middleware.log_incoming_request - recommend - start
|
54 |
+
2025-01-20T00:46:41.889 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 23098.92
|
55 |
+
2025-01-20T00:46:41.905 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
56 |
+
2025-01-20T00:46:50.386 INFO utils.middleware.log_incoming_request - recommend - start
|
57 |
+
2025-01-20T00:46:53.681 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 3295.55
|
58 |
+
2025-01-20T00:46:53.697 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
59 |
+
2025-01-20T00:47:04.192 INFO utils.middleware.log_incoming_request - recommend - start
|
60 |
+
2025-01-20T00:47:04.196 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 3.72
|
61 |
+
2025-01-20T00:47:04.196 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
62 |
+
2025-01-20T00:47:09.505 INFO utils.middleware.log_incoming_request - recommend - start
|
63 |
+
2025-01-20T00:47:09.513 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 8.01
|
64 |
+
2025-01-20T00:47:09.519 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
65 |
+
2025-01-20T00:50:05.276 INFO utils.middleware.log_incoming_request - readLogs - start
|
66 |
+
2025-01-20T00:50:05.546 INFO utils.middleware.log_incoming_request - readLogs - end in time (ms): 270.15
|
67 |
+
2025-01-20T00:50:05.556 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
68 |
+
2025-01-20T00:50:10.956 INFO utils.middleware.log_incoming_request - readLogs - start
|
69 |
+
2025-01-20T00:50:10.966 INFO utils.middleware.log_incoming_request - readLogs - end in time (ms): 10.31
|
70 |
+
2025-01-20T00:50:10.980 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
71 |
+
2025-01-20T00:50:13.188 INFO utils.middleware.log_incoming_request - readLogs - start
|
72 |
+
2025-01-20T00:50:13.206 INFO utils.middleware.log_incoming_request - readLogs - end in time (ms): 17.76
|
73 |
+
2025-01-20T00:50:13.212 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
74 |
+
2025-01-20T00:50:34.126 INFO utils.middleware.log_incoming_request - recommend - start
|
75 |
+
2025-01-20T00:50:34.136 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 10.13
|
76 |
+
2025-01-20T00:50:34.143 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
77 |
+
2025-01-20T00:50:39.423 INFO utils.middleware.log_incoming_request - recommend - start
|
78 |
+
2025-01-20T00:50:39.473 INFO utils.middleware.log_incoming_request - recommend - end in time (ms): 49.81
|
79 |
+
2025-01-20T00:50:39.473 INFO utils.middleware.request_cancellation - Task Cancellatation Requested.
|
pickles/16k_apparrel_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b22ccca564ec344593b30c8b4657493693e56d80e214ea5b04380eb7d33cae45
|
3 |
+
size 3243088
|
pickles/17k_apparrel_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eae9951b598965b93e5042e5fe576a2dd92826e4a4c16de4659c8ae5afe542c2
|
3 |
+
size 14642960
|
pickles/28k_apparel_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58ef313a0b016e5914bf1cb9990c502230821585cc702b55f2ea04a1c2cb4f55
|
3 |
+
size 24447034
|
requirement.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
fastapi-router-controller
|
3 |
+
Pillow
|
4 |
+
matplotlib
|
5 |
+
nltk
|
6 |
+
seaborn
|
7 |
+
scikit-learn
|
8 |
+
plotly
|
9 |
+
gensim
|
10 |
+
keras
|
11 |
+
tensorflow
|
12 |
+
python-multipart
|
utils/BagOfWords.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.metrics.pairwise import pairwise_distances
|
4 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
5 |
+
from typing import List, Dict
|
6 |
+
import os
|
7 |
+
|
8 |
+
from utils.config import Config
|
9 |
+
|
10 |
+
# Load the dataset (replace with the actual path to your dataset)
|
11 |
+
dataset_path = Config.read('app', 'dataset')
|
12 |
+
|
13 |
+
# Ensure the dataset exists
|
14 |
+
if not os.path.exists(dataset_path):
|
15 |
+
raise FileNotFoundError(f"The dataset file at {dataset_path} was not found.")
|
16 |
+
|
17 |
+
# Load the dataset
|
18 |
+
data = pd.read_pickle(dataset_path)
|
19 |
+
|
20 |
+
# Ensure the dataset has the necessary columns: 'asin', 'title', 'brand', 'medium_image_url'
|
21 |
+
required_columns = ['asin', 'title', 'brand', 'medium_image_url']
|
22 |
+
for col in required_columns:
|
23 |
+
if col not in data.columns:
|
24 |
+
raise ValueError(f"Missing required column: {col} in the dataset")
|
25 |
+
|
26 |
+
# Set up the vectorizer and fit the model
|
27 |
+
title_vectorizer = CountVectorizer()
|
28 |
+
title_features = title_vectorizer.fit_transform(data['title'])
|
29 |
+
|
30 |
+
# Function to calculate the bag-of-words model and return closest matches
|
31 |
+
def bag_of_words_model(query: str, num_results: int) -> List[Dict]:
|
32 |
+
# Transform the input query to the same feature space
|
33 |
+
query_vec = title_vectorizer.transform([query])
|
34 |
+
|
35 |
+
# Calculate pairwise distances between the query and all items in the corpus
|
36 |
+
pairwise_dist = pairwise_distances(title_features, query_vec, metric='cosine')
|
37 |
+
|
38 |
+
# Get the indices of the closest matches
|
39 |
+
indices = np.argsort(pairwise_dist.flatten())[0:num_results]
|
40 |
+
|
41 |
+
results = []
|
42 |
+
for idx in indices:
|
43 |
+
result = {
|
44 |
+
'asin': data['asin'].iloc[idx],
|
45 |
+
'brand': data['brand'].iloc[idx],
|
46 |
+
'title': data['title'].iloc[idx],
|
47 |
+
'url': data['medium_image_url'].iloc[idx],
|
48 |
+
}
|
49 |
+
results.append(result)
|
50 |
+
|
51 |
+
return results
|
utils/cnn.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from sklearn.metrics.pairwise import pairwise_distances
|
3 |
+
from typing import List, Dict
|
4 |
+
from utils.config import Config
|
5 |
+
from PIL import Image
|
6 |
+
import pandas as pd
|
7 |
+
import tensorflow as tf
|
8 |
+
import io
|
9 |
+
import os
|
10 |
+
|
11 |
+
# Load the dataset (replace with the actual path to your dataset)
|
12 |
+
dataset_path = Config.read('app', 'dataset')
|
13 |
+
|
14 |
+
# Ensure the dataset exists
|
15 |
+
if not os.path.exists(dataset_path):
|
16 |
+
raise FileNotFoundError(f"The dataset file at {dataset_path} was not found.")
|
17 |
+
|
18 |
+
# Load the dataset
|
19 |
+
data = pd.read_pickle(dataset_path)
|
20 |
+
|
21 |
+
# Ensure the dataset has the necessary columns: 'asin', 'title', 'brand', 'medium_image_url'
|
22 |
+
required_columns = ['asin', 'title', 'brand', 'medium_image_url']
|
23 |
+
for col in required_columns:
|
24 |
+
if col not in data.columns:
|
25 |
+
raise ValueError(f"Missing required column: {col} in the dataset")
|
26 |
+
|
27 |
+
# Load the pre-trained CNN features and corresponding ASINs
|
28 |
+
bottleneck_features_train = np.load(Config.read('app', 'cnnmodel'))
|
29 |
+
bottleneck_features_train = bottleneck_features_train.astype(np.float64)
|
30 |
+
asins = np.load(Config.read('app', 'cssasins'))
|
31 |
+
asins = list(asins)
|
32 |
+
|
33 |
+
|
34 |
+
# Helper function to extract features from the uploaded image using a pre-trained model
|
35 |
+
def extract_features_from_image(image_bytes):
|
36 |
+
image = Image.open(io.BytesIO(image_bytes))
|
37 |
+
image = image.resize((224, 224))
|
38 |
+
image_array = np.array(image) / 255.0
|
39 |
+
image_array = np.expand_dims(image_array, axis=0)
|
40 |
+
|
41 |
+
# Load the VGG16 model for feature extraction
|
42 |
+
model = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
|
43 |
+
features = model.predict(image_array)
|
44 |
+
features = features.flatten()
|
45 |
+
|
46 |
+
return features
|
47 |
+
|
48 |
+
# Function to get similar products based on CNN features
|
49 |
+
def get_similar_products_cnn(image_features, num_results: int) -> List[Dict]:
|
50 |
+
|
51 |
+
pairwise_dist = pairwise_distances(bottleneck_features_train, image_features.reshape(1, -1))
|
52 |
+
|
53 |
+
# Get the indices of the closest products
|
54 |
+
indices = np.argsort(pairwise_dist.flatten())[0:num_results]
|
55 |
+
|
56 |
+
results = []
|
57 |
+
for i in range(len(indices)):
|
58 |
+
# Get the product details for each closest match
|
59 |
+
product_details = data[['asin', 'brand', 'title', 'medium_image_url']].loc[data['asin'] == asins[indices[i]]]
|
60 |
+
for indx, row in product_details.iterrows():
|
61 |
+
result = {
|
62 |
+
'asin': row['asin'],
|
63 |
+
'brand': row['brand'],
|
64 |
+
'title': row['title'],
|
65 |
+
'url': row['medium_image_url']
|
66 |
+
}
|
67 |
+
results.append(result)
|
68 |
+
|
69 |
+
return results
|
70 |
+
|
utils/config/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
from configparser import ConfigParser
|
4 |
+
|
5 |
+
this_dir = Path(__file__).parent
|
6 |
+
conf_dir = this_dir / 'properties.ini'
|
7 |
+
|
8 |
+
parser = ConfigParser(os.environ)
|
9 |
+
parser.read(conf_dir, encoding="utf8")
|
10 |
+
|
11 |
+
|
12 |
+
class Config():
|
13 |
+
@staticmethod
|
14 |
+
def read(section, property, default=None):
|
15 |
+
return parser.get(section, property) or default
|
utils/config/properties.ini
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[app]
|
2 |
+
name=Apparel Recommendation System
|
3 |
+
env=%(ENV)
|
4 |
+
dataset=pickles/16k_apparrel_data
|
5 |
+
cnnmodel=models/16k_data_cnn_features.npy
|
6 |
+
cssasins=models/16k_data_cnn_feature_asins.npy
|
7 |
+
|
8 |
+
# we are currently using this model
|
9 |
+
model=ibm-granite/granite-3b-code-base
|
10 |
+
|
11 |
+
[log]
|
12 |
+
level=INFO
|
13 |
+
filename=./logs.log
|
14 |
+
dateformat=%%Y-%%m-%%dT%%H:%%M:%%S
|
15 |
+
format=%%(asctime)s.%%(msecs)03d %%(levelname)5s %%(name)s - %%(message)s
|
utils/logger.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
from utils.config import Config
|
4 |
+
|
5 |
+
|
6 |
+
class Logger():
|
7 |
+
CONFIG_KEY = 'log'
|
8 |
+
|
9 |
+
@staticmethod
|
10 |
+
def get_level():
|
11 |
+
return Config.read(Logger.CONFIG_KEY, 'level')
|
12 |
+
|
13 |
+
@staticmethod
|
14 |
+
def get_filename():
|
15 |
+
return Config.read(Logger.CONFIG_KEY, 'filename')
|
16 |
+
|
17 |
+
@staticmethod
|
18 |
+
def get_format():
|
19 |
+
return Config.read(Logger.CONFIG_KEY, 'format')
|
20 |
+
|
21 |
+
@staticmethod
|
22 |
+
def get_date_format():
|
23 |
+
return Config.read(Logger.CONFIG_KEY, 'dateformat')
|
24 |
+
|
25 |
+
@staticmethod
|
26 |
+
def get_logger(name):
|
27 |
+
logger = logging.getLogger(name)
|
28 |
+
logger.setLevel(Logger.get_level()) # type: ignore
|
29 |
+
|
30 |
+
formatter = logging.Formatter(
|
31 |
+
Logger.get_format(),
|
32 |
+
Logger.get_date_format())
|
33 |
+
|
34 |
+
file_hdlr = logging.FileHandler(Logger.get_filename()) # type: ignore
|
35 |
+
file_hdlr.setFormatter(formatter)
|
36 |
+
logger.addHandler(hdlr=file_hdlr)
|
37 |
+
|
38 |
+
return logger
|
utils/middleware/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.middleware.log_incoming_request import LogIncomingRequest
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
'LogIncomingRequest'
|
5 |
+
]
|
utils/middleware/log_incoming_request.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from typing import List
|
3 |
+
from fastapi import Request
|
4 |
+
from fastapi.routing import APIRoute
|
5 |
+
from starlette.middleware.base import BaseHTTPMiddleware
|
6 |
+
from utils.logger import Logger
|
7 |
+
|
8 |
+
logger = Logger.get_logger(__name__)
|
9 |
+
|
10 |
+
|
11 |
+
class LogIncomingRequest(BaseHTTPMiddleware):
|
12 |
+
def __get_request_handler(_, req: Request): # type: ignore
|
13 |
+
# get controller from request
|
14 |
+
routes: List[APIRoute] = req.app.routes
|
15 |
+
for route in routes:
|
16 |
+
if route.path_regex.match(req.url.path) and req.method in route.methods:
|
17 |
+
return route.endpoint.__name__ if hasattr(route.endpoint, '__name__') else 'fastapi_core'
|
18 |
+
|
19 |
+
async def dispatch(self, request: Request, call_next):
|
20 |
+
func_name = self.__get_request_handler(request)
|
21 |
+
request.state.func_name = func_name
|
22 |
+
|
23 |
+
logger.info('{} - start'.format(func_name))
|
24 |
+
start_time = time.time()
|
25 |
+
|
26 |
+
response = await call_next(request)
|
27 |
+
|
28 |
+
process_time = (time.time() - start_time) * 1000
|
29 |
+
formatted_process_time = '{0:.2f}'.format(process_time)
|
30 |
+
logger.info('{} - end in time (ms): {}'.format(func_name,
|
31 |
+
formatted_process_time))
|
32 |
+
return response
|
utils/middleware/request_cancellation.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
from utils.logger import Logger
|
3 |
+
|
4 |
+
logger = Logger.get_logger(__name__)
|
5 |
+
|
6 |
+
|
7 |
+
class RequestCancellation:
|
8 |
+
"""
|
9 |
+
RequestCancellation middleware handles request canceling
|
10 |
+
* In case of API routes where very frequent/expensive requests are made.
|
11 |
+
"""
|
12 |
+
|
13 |
+
def __init__(self, app):
|
14 |
+
self.app = app
|
15 |
+
|
16 |
+
async def __call__(self, scope, receive, send):
|
17 |
+
if scope["type"] != "http":
|
18 |
+
await self.app(scope, receive, send)
|
19 |
+
return
|
20 |
+
|
21 |
+
queue = asyncio.Queue()
|
22 |
+
|
23 |
+
async def message_poller(sentinel, handler_task):
|
24 |
+
nonlocal queue
|
25 |
+
while True:
|
26 |
+
message = await receive()
|
27 |
+
if message["type"] == "http.disconnect":
|
28 |
+
handler_task.cancel()
|
29 |
+
return sentinel
|
30 |
+
await queue.put(message)
|
31 |
+
|
32 |
+
sentinel = object()
|
33 |
+
handler_task = asyncio.create_task(self.app(scope, queue.get, send))
|
34 |
+
asyncio.create_task(message_poller(sentinel, handler_task))
|
35 |
+
|
36 |
+
try:
|
37 |
+
return await handler_task
|
38 |
+
except asyncio.CancelledError:
|
39 |
+
logger.info('Task Cancellatation Requested.')
|
utils/tfidf.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
+
from sklearn.metrics.pairwise import pairwise_distances
|
5 |
+
from typing import List, Dict
|
6 |
+
from utils.config import Config
|
7 |
+
import os
|
8 |
+
|
9 |
+
|
10 |
+
# Load the dataset (replace with the actual path to your dataset)
|
11 |
+
dataset_path = Config.read('app', 'dataset')
|
12 |
+
|
13 |
+
# Ensure the dataset exists
|
14 |
+
if not os.path.exists(dataset_path):
|
15 |
+
raise FileNotFoundError(f"The dataset file at {dataset_path} was not found.")
|
16 |
+
|
17 |
+
# Load the dataset
|
18 |
+
data = pd.read_pickle(dataset_path)
|
19 |
+
|
20 |
+
# Ensure the dataset has the necessary columns: 'asin', 'title', 'brand', 'medium_image_url'
|
21 |
+
required_columns = ['asin', 'title', 'brand', 'medium_image_url']
|
22 |
+
for col in required_columns:
|
23 |
+
if col not in data.columns:
|
24 |
+
raise ValueError(f"Missing required column: {col} in the dataset")
|
25 |
+
|
26 |
+
# Set up the vectorizer and fit the model
|
27 |
+
tfidf_title_vectorizer = TfidfVectorizer(min_df = 0.0)
|
28 |
+
tfidf_title_features = tfidf_title_vectorizer.fit_transform(data['title'])
|
29 |
+
|
30 |
+
# Function to calculate the tf-idf model and return closest matches
|
31 |
+
def tfidf_model(input_text: str, num_results: int) -> List[Dict]:
|
32 |
+
|
33 |
+
# Transform the input text to the same TF-IDF feature space
|
34 |
+
query_vec = tfidf_title_vectorizer.transform([input_text])
|
35 |
+
|
36 |
+
pairwise_dist = pairwise_distances(tfidf_title_features, query_vec)
|
37 |
+
|
38 |
+
# np.argsort will return indices of 9 smallest distances
|
39 |
+
indices = np.argsort(pairwise_dist.flatten())[0:num_results]
|
40 |
+
|
41 |
+
#data frame indices of the 9 smallest distace's
|
42 |
+
df_indices = list(data.index[indices])
|
43 |
+
|
44 |
+
results = []
|
45 |
+
for i in range(0,len(indices)):
|
46 |
+
result = {
|
47 |
+
'asin': data['asin'].loc[df_indices[i]],
|
48 |
+
'brand': data['brand'].loc[df_indices[i]],
|
49 |
+
'title': data['title'].loc[df_indices[i]],
|
50 |
+
'url': data['medium_image_url'].loc[df_indices[i]]
|
51 |
+
}
|
52 |
+
results.append(result)
|
53 |
+
|
54 |
+
return results
|