santu24 commited on
Commit
4aa2661
·
verified ·
1 Parent(s): 1e4bacf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +447 -480
app.py CHANGED
@@ -1,480 +1,447 @@
1
- import os
2
- import re
3
- import ast
4
- import json
5
- import requests
6
- import pandas as pd
7
- import sqlite3
8
- import logging
9
-
10
- from flask import (
11
- Flask,
12
- request,
13
- jsonify,
14
- render_template,
15
- redirect,
16
- url_for,
17
- session
18
- )
19
- from flask_session import Session
20
- from dotenv import load_dotenv
21
-
22
- # Load environment variables from a .env file
23
- load_dotenv()
24
-
25
- # Configure Logging
26
- logging.basicConfig(level=logging.INFO)
27
- logger = logging.getLogger(__name__)
28
-
29
- ##############################################################################
30
- # CONFIG
31
- ##############################################################################
32
- CSV_PATH = "All_Categories.csv" # Path to your large CSV
33
- DB_PATH = "products.db" # SQLite database file
34
- TABLE_NAME = "products"
35
-
36
- # Securely load your Gemini API key from environment variables
37
- GEMINI_API_KEY = "AIzaSyCjX_-b9P9Sv5UY8eMLyaj6L_fjoh90EVM" # Ensure you set this in your .env file
38
-
39
- if not GEMINI_API_KEY:
40
- logger.error("Gemini API key not found. Please set GEMINI_API_KEY in your .env file.")
41
- raise ValueError("Gemini API key not found. Please set GEMINI_API_KEY in your .env file.")
42
-
43
- # Replace with the correct model name your account has access to
44
- GEMINI_MODEL_NAME = "gemini-1.5-flash" # If invalid, try "gemini-1.5-pro"
45
- GEMINI_ENDPOINT = f"https://generativelanguage.googleapis.com/v1beta/models/{GEMINI_MODEL_NAME}:generateContent"
46
-
47
- ##############################################################################
48
- # 1) CREATE DB FROM CSV (IF NEEDED)
49
- ##############################################################################
50
- def create_db_from_csv(csv_file, db_file):
51
- if os.path.exists(db_file):
52
- logger.info(f"Database '{db_file}' already exists. Skipping creation.")
53
- return
54
-
55
- logger.info(f"Creating SQLite DB from CSV: {csv_file} -> {db_file}")
56
- df_iter = pd.read_csv(csv_file, chunksize=50000)
57
- conn = sqlite3.connect(db_file)
58
- cur = conn.cursor()
59
- cur.execute(f"DROP TABLE IF EXISTS {TABLE_NAME}")
60
- conn.commit()
61
-
62
- create_sql = f"""
63
- CREATE TABLE {TABLE_NAME} (
64
- id INTEGER PRIMARY KEY AUTOINCREMENT,
65
- name TEXT,
66
- image TEXT,
67
- link TEXT,
68
- ratings REAL,
69
- no_of_ratings INTEGER,
70
- discount_price TEXT,
71
- actual_price TEXT,
72
- search_terms TEXT,
73
- recommended_5 TEXT,
74
- category TEXT
75
- );
76
- """
77
- cur.execute(create_sql)
78
- conn.commit()
79
-
80
- # Create indexes to optimize search performance
81
- cur.execute(f"CREATE INDEX idx_name ON {TABLE_NAME}(name);")
82
- cur.execute(f"CREATE INDEX idx_category ON {TABLE_NAME}(category);")
83
- cur.execute(f"CREATE INDEX idx_discount_price ON {TABLE_NAME}(discount_price);")
84
- conn.commit()
85
-
86
- # Optional: Create Full-Text Search (FTS) table
87
- # Uncomment if you want to use FTS
88
- # cur.execute(f"""
89
- # CREATE VIRTUAL TABLE {TABLE_NAME}_fts USING fts5(name, search_terms, category);
90
- # INSERT INTO {TABLE_NAME}_fts(rowid, name, search_terms, category)
91
- # SELECT id, name, search_terms, category FROM {TABLE_NAME};
92
- # """)
93
- # conn.commit()
94
-
95
- chunk_idx = 0
96
- for chunk in df_iter:
97
- logger.info(f"Processing chunk {chunk_idx}...")
98
- chunk_idx += 1
99
-
100
- # Ensure all required columns are present
101
- required_columns = [
102
- "name","image","link","ratings","no_of_ratings",
103
- "discount_price","actual_price","search_terms","recommended_5","category"
104
- ]
105
- for col in required_columns:
106
- if col not in chunk.columns:
107
- chunk[col] = ""
108
- chunk.fillna("", inplace=True)
109
- records = chunk.to_dict(orient="records")
110
-
111
- insert_sql = f"""
112
- INSERT INTO {TABLE_NAME}
113
- (name, image, link, ratings, no_of_ratings,
114
- discount_price, actual_price, search_terms,
115
- recommended_5, category)
116
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
117
- """
118
- data_to_insert = []
119
- for r in records:
120
- # Clean and prepare data
121
- try:
122
- ratings = float(r["ratings"]) if r["ratings"] else 0.0
123
- except ValueError:
124
- ratings = 0.0
125
- try:
126
- no_of_ratings = int(r["no_of_ratings"]) if r["no_of_ratings"] else 0
127
- except ValueError:
128
- no_of_ratings = 0
129
- row_tuple = (
130
- str(r["name"]),
131
- str(r["image"]),
132
- str(r["link"]),
133
- ratings,
134
- no_of_ratings,
135
- str(r["discount_price"]),
136
- str(r["actual_price"]),
137
- str(r["search_terms"]),
138
- str(r["recommended_5"]),
139
- str(r["category"])
140
- )
141
- data_to_insert.append(row_tuple)
142
-
143
- cur.executemany(insert_sql, data_to_insert)
144
- conn.commit()
145
-
146
- conn.close()
147
- logger.info("Database creation complete.")
148
-
149
- ##############################################################################
150
- # FLASK APP
151
- ##############################################################################
152
- app = Flask(__name__)
153
- app.secret_key = os.getenv("FLASK_SECRET_KEY", "YOUR_SECURE_RANDOM_KEY") # Replace with a secure random key
154
- app.config["SESSION_TYPE"] = "filesystem"
155
- Session(app)
156
-
157
- ##############################################################################
158
- # 2) NORMAL SEARCH & RECOMMENDATION
159
- ##############################################################################
160
- @app.route("/")
161
- def index():
162
- """Home page with a search bar."""
163
- return render_template("index.html")
164
-
165
- @app.route("/autocomplete")
166
- def autocomplete():
167
- """Return (id, name) JSON for substring search in 'name'."""
168
- q = request.args.get("q", "").strip()
169
- if not q:
170
- return jsonify([])
171
-
172
- conn = sqlite3.connect(DB_PATH)
173
- cur = conn.cursor()
174
- sql = f"""
175
- SELECT id, name
176
- FROM {TABLE_NAME}
177
- WHERE LOWER(name) LIKE LOWER(?)
178
- LIMIT 10
179
- """
180
- wildcard = f"%{q}%"
181
- rows = cur.execute(sql, (wildcard,)).fetchall()
182
- conn.close()
183
-
184
- results = [{"id": r[0], "name": r[1]} for r in rows]
185
- return jsonify(results)
186
-
187
- @app.route("/product/<int:item_id>")
188
- def show_product(item_id):
189
- """Show product detail + top-5 recommended items from recommended_5."""
190
- conn = sqlite3.connect(DB_PATH)
191
- cur = conn.cursor()
192
-
193
- sql = f"SELECT * FROM {TABLE_NAME} WHERE id=?"
194
- row = cur.execute(sql, (item_id,)).fetchone()
195
- if not row:
196
- conn.close()
197
- return "<h2>Product not found</h2>", 404
198
-
199
- product = {
200
- "id": row[0],
201
- "name": row[1],
202
- "image": row[2],
203
- "link": row[3],
204
- "ratings": row[4],
205
- "no_of_ratings": row[5],
206
- "discount_price": row[6],
207
- "actual_price": row[7],
208
- "search_terms": row[8],
209
- "recommended_5": row[9],
210
- "category": row[10]
211
- }
212
-
213
- # Parse recommended_5
214
- try:
215
- rec_list = ast.literal_eval(product["recommended_5"])
216
- if not isinstance(rec_list, list):
217
- rec_list = []
218
- except:
219
- rec_list = []
220
-
221
- recommended_details = []
222
- for rec_name in rec_list[:5]:
223
- sql_rec = f"SELECT * FROM {TABLE_NAME} WHERE name LIKE ? LIMIT 1"
224
- rec_row = cur.execute(sql_rec, (f"%{rec_name}%",)).fetchone()
225
- if rec_row:
226
- recommended_details.append({
227
- "id": rec_row[0],
228
- "name": rec_row[1],
229
- "image": rec_row[2],
230
- "link": rec_row[3],
231
- "discount_price": rec_row[6]
232
- })
233
-
234
- conn.close()
235
- return render_template("product.html",
236
- product=product,
237
- recommended=recommended_details)
238
-
239
- ##############################################################################
240
- # 3) RAG-LIKE CHAT WITH IN-DEPTH PARSING AND CUSTOM PROMPT
241
- ##############################################################################
242
- @app.route("/rag")
243
- def rag_index():
244
- """RAG Chat page storing conversation in session['rag_chat']. """
245
- if "rag_chat" not in session:
246
- session["rag_chat"] = []
247
- return render_template("rag.html", chat_history=session["rag_chat"])
248
-
249
- @app.route("/rag/query", methods=["POST"])
250
- def rag_query():
251
- """
252
- Process user input with an in-depth approach:
253
- - Dynamically extract brands, product types, and price limits from the query.
254
- - Perform precise DB filtering based on extracted parameters.
255
- - Construct a structured prompt for Gemini using the filtered results.
256
- - Parse Gemini's response and update the conversation history.
257
- """
258
- if "rag_chat" not in session:
259
- session["rag_chat"] = []
260
-
261
- user_input = request.form.get("rag_input", "").strip()
262
- if not user_input:
263
- return redirect(url_for("rag_index"))
264
-
265
- # 1) Add user message
266
- session["rag_chat"].append(("user", user_input))
267
-
268
- # 2) Extract brands, product types, and price from user query
269
- brand_keyword, product_type, price_val = extract_query_parameters(user_input)
270
-
271
- # 3) Perform DB filtering based on extracted parameters
272
- matched_items = filter_database(brand_keyword, product_type, price_val)
273
-
274
- # 4) Construct DB context for the prompt
275
- db_context = build_db_context(matched_items, brand_keyword, product_type, price_val)
276
-
277
- # 5) Construct the prompt with conversation history and DB context
278
- conversation_text = construct_prompt(session["rag_chat"], db_context)
279
-
280
- # 6) Call Gemini's generateContent
281
- gemini_response = gemini_generate_content(
282
- api_key=GEMINI_API_KEY,
283
- conversation_text=conversation_text
284
- )
285
-
286
- # 7) Add assistant message
287
- session["rag_chat"].append(("assistant", gemini_response))
288
- return redirect(url_for("rag_index"))
289
-
290
- def extract_query_parameters(user_query):
291
- """
292
- Extract brand, product type, and price from the user's query dynamically.
293
- """
294
- user_lower = user_query.lower()
295
-
296
- # Extract price
297
- price = None
298
- # Look for patterns like "under 5000", "below 25k", etc.
299
- price_match = re.search(r'(under|below)\s+₹?(\d+[kK]?)', user_lower)
300
- if price_match:
301
- price_str = price_match.group(2)
302
- if price_str.lower().endswith('k'):
303
- price = int(price_str[:-1]) * 1000
304
- else:
305
- price = int(price_str)
306
-
307
- # Dynamically extract brands and product types from the database
308
- conn = sqlite3.connect(DB_PATH)
309
- cur = conn.cursor()
310
-
311
- # Fetch distinct categories and search_terms to build dynamic keyword lists
312
- cur.execute(f"SELECT DISTINCT category FROM {TABLE_NAME}")
313
- categories = [row[0].lower() for row in cur.fetchall()]
314
-
315
- cur.execute(f"SELECT DISTINCT search_terms FROM {TABLE_NAME}")
316
- search_terms = [row[0].lower() for row in cur.fetchall()]
317
-
318
- conn.close()
319
-
320
- # Initialize variables
321
- brand = None
322
- product_type = None
323
-
324
- # Check for product types in user query
325
- for category in categories:
326
- if category in user_lower:
327
- product_type = category
328
- break
329
-
330
- # If not found in category, check search_terms
331
- if not product_type:
332
- for term in search_terms:
333
- if term in user_lower:
334
- product_type = term
335
- break
336
-
337
- # For brand, attempt to extract from the search_terms by splitting
338
- possible_brands = set()
339
- for term in search_terms:
340
- words = term.split()
341
- possible_brands.update(words)
342
-
343
- possible_brands = list(possible_brands)
344
-
345
- for b in possible_brands:
346
- if b in user_lower:
347
- brand = b
348
- break
349
-
350
- return brand, product_type, price
351
-
352
- def filter_database(brand, product_type, price):
353
- """
354
- Filter the database based on brand, product type, and price.
355
- """
356
- conn = sqlite3.connect(DB_PATH)
357
- cur = conn.cursor()
358
-
359
- # Build dynamic SQL query
360
- sql = f"SELECT id, name, discount_price, recommended_5 FROM {TABLE_NAME} WHERE 1=1"
361
- params = []
362
-
363
- if brand:
364
- sql += " AND LOWER(name) LIKE ?"
365
- params.append(f"%{brand}%")
366
- if product_type:
367
- sql += " AND LOWER(category) LIKE ?"
368
- params.append(f"%{product_type}%")
369
- if price:
370
- # Clean the discount_price field to extract numerical value
371
- # Assuming discount_price is stored as a string like "₹1,299"
372
- sql += " AND CAST(REPLACE(REPLACE(discount_price, '₹', ''), ',', '') AS INTEGER) <= ?"
373
- params.append(price)
374
-
375
- # Limit to 5000 for performance; adjust as needed
376
- sql += " LIMIT 5000"
377
-
378
- rows = cur.execute(sql, tuple(params)).fetchall()
379
- conn.close()
380
-
381
- return rows
382
-
383
- def build_db_context(matched_items, brand, product_type, price):
384
- """
385
- Build a structured context string from matched database items.
386
- """
387
- db_context = ""
388
- if matched_items:
389
- db_context += f"Found {len(matched_items)} items"
390
- if price:
391
- db_context += f" under ₹{price}"
392
- if brand or product_type:
393
- db_context += " matching your criteria"
394
- db_context += ":\n"
395
-
396
- # List up to 10 items for context
397
- for item in matched_items[:10]:
398
- item_name = item[1]
399
- item_price = item[2]
400
- db_context += f"- {item_name} at ₹{item_price}\n"
401
- else:
402
- db_context += "No matching items found in the database.\n"
403
-
404
- return db_context
405
-
406
- def construct_prompt(chat_history, db_context):
407
- """
408
- Construct the prompt to send to Gemini, including conversation history and DB context.
409
- """
410
- prompt = (
411
- "You are an intelligent assistant that provides product recommendations based on the user's query and the available database.\n\n"
412
- "Conversation so far:\n"
413
- )
414
- for speaker, message in chat_history:
415
- prompt += f"{speaker.capitalize()}: {message}\n"
416
-
417
- prompt += f"\nDatabase Context:\n{db_context}\n"
418
-
419
- prompt += "Based on the above information, provide a helpful and concise answer to the user's query."
420
-
421
- return prompt
422
-
423
- def gemini_generate_content(api_key, conversation_text):
424
- """
425
- Call the Gemini API's generateContent endpoint with the constructed prompt.
426
- """
427
- url = f"{GEMINI_ENDPOINT}?key={api_key}"
428
-
429
- payload = {
430
- "contents": [
431
- {
432
- "parts": [{"text": conversation_text}]
433
- }
434
- ]
435
- }
436
-
437
- headers = {"Content-Type": "application/json"}
438
- try:
439
- resp = requests.post(url, headers=headers, data=json.dumps(payload))
440
- except Exception as e:
441
- logger.error(f"Error during Gemini API request: {e}")
442
- return f"[Gemini Error] Failed to connect to Gemini API: {e}"
443
-
444
- try:
445
- data = resp.json()
446
- except Exception as e:
447
- logger.error(f"Invalid JSON response from Gemini API: {e}")
448
- return f"[Gemini Error] Invalid JSON response: {e}"
449
-
450
- if resp.status_code != 200:
451
- logger.error(f"Gemini API returned error {resp.status_code}: {data}")
452
- return f"[Gemini Error {resp.status_code}] {json.dumps(data, indent=2)}"
453
-
454
- # Parse the "candidates" structure
455
- candidates = data.get("candidates", [])
456
- if not candidates:
457
- logger.error(f"No candidates received from Gemini API: {data}")
458
- return f"No candidates received. Debug JSON: {json.dumps(data, indent=2)}"
459
-
460
- first_candidate = candidates[0]
461
- content = first_candidate.get("content", {})
462
- parts = content.get("parts", [])
463
- if not parts:
464
- logger.error(f"No 'parts' found in candidate content: {data}")
465
- return f"No 'parts' found in candidate content. Debug JSON: {json.dumps(data, indent=2)}"
466
-
467
- assistant_reply = parts[0].get("text", "(No text found in the response)")
468
- logger.info(f"Gemini Assistant Reply: {assistant_reply}")
469
- return assistant_reply
470
-
471
- ##############################################################################
472
- # MAIN
473
- ##############################################################################
474
- def main():
475
- create_db_from_csv(CSV_PATH, DB_PATH)
476
- logger.info("Starting Flask server at http://127.0.0.1:5000")
477
- app.run(debug=True)
478
-
479
- if __name__ == "__main__":
480
- main()
 
1
+ import os
2
+ import re
3
+ import ast
4
+ import json
5
+ import requests
6
+ import pandas as pd
7
+ import sqlite3
8
+ import logging
9
+
10
+ from flask import (
11
+ Flask,
12
+ request,
13
+ jsonify,
14
+ render_template,
15
+ redirect,
16
+ url_for,
17
+ session
18
+ )
19
+ from flask_session import Session
20
+ from dotenv import load_dotenv
21
+
22
+ # Load environment variables from a .env file
23
+ load_dotenv()
24
+
25
+ # Configure Logging
26
+ logging.basicConfig(level=logging.INFO)
27
+ logger = logging.getLogger(__name__)
28
+
29
+ CSV_PATH = "All_Categories.csv" # Path to your large CSV
30
+ DB_PATH = "products.db" # SQLite database file
31
+ TABLE_NAME = "products"
32
+
33
+ # Securely load your Gemini API key from environment variables
34
+ GEMINI_API_KEY = os.getenv(GEMINI_API_KEY) # Ensure you set this in your .env file
35
+
36
+ if not GEMINI_API_KEY:
37
+ logger.error("Gemini API key not found. Please set GEMINI_API_KEY in your .env file.")
38
+ raise ValueError("Gemini API key not found. Please set GEMINI_API_KEY in your .env file.")
39
+
40
+ # Replace with the correct model name your account has access to
41
+ GEMINI_MODEL_NAME = "gemini-1.5-flash" # If invalid, try "gemini-1.5-pro"
42
+ GEMINI_ENDPOINT = f"https://generativelanguage.googleapis.com/v1beta/models/{GEMINI_MODEL_NAME}:generateContent"
43
+
44
+
45
+ def create_db_from_csv(csv_file, db_file):
46
+ if os.path.exists(db_file):
47
+ logger.info(f"Database '{db_file}' already exists. Skipping creation.")
48
+ return
49
+
50
+ logger.info(f"Creating SQLite DB from CSV: {csv_file} -> {db_file}")
51
+ df_iter = pd.read_csv(csv_file, chunksize=50000)
52
+ conn = sqlite3.connect(db_file)
53
+ cur = conn.cursor()
54
+ cur.execute(f"DROP TABLE IF EXISTS {TABLE_NAME}")
55
+ conn.commit()
56
+
57
+ create_sql = f"""
58
+ CREATE TABLE {TABLE_NAME} (
59
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
60
+ name TEXT,
61
+ image TEXT,
62
+ link TEXT,
63
+ ratings REAL,
64
+ no_of_ratings INTEGER,
65
+ discount_price TEXT,
66
+ actual_price TEXT,
67
+ search_terms TEXT,
68
+ recommended_5 TEXT,
69
+ category TEXT
70
+ );
71
+ """
72
+ cur.execute(create_sql)
73
+ conn.commit()
74
+
75
+ # Create indexes to optimize search performance
76
+ cur.execute(f"CREATE INDEX idx_name ON {TABLE_NAME}(name);")
77
+ cur.execute(f"CREATE INDEX idx_category ON {TABLE_NAME}(category);")
78
+ cur.execute(f"CREATE INDEX idx_discount_price ON {TABLE_NAME}(discount_price);")
79
+ conn.commit()
80
+
81
+ chunk_idx = 0
82
+ for chunk in df_iter:
83
+ logger.info(f"Processing chunk {chunk_idx}...")
84
+ chunk_idx += 1
85
+
86
+ # Ensure all required columns are present
87
+ required_columns = [
88
+ "name","image","link","ratings","no_of_ratings",
89
+ "discount_price","actual_price","search_terms","recommended_5","category"
90
+ ]
91
+ for col in required_columns:
92
+ if col not in chunk.columns:
93
+ chunk[col] = ""
94
+ chunk.fillna("", inplace=True)
95
+ records = chunk.to_dict(orient="records")
96
+
97
+ insert_sql = f"""
98
+ INSERT INTO {TABLE_NAME}
99
+ (name, image, link, ratings, no_of_ratings,
100
+ discount_price, actual_price, search_terms,
101
+ recommended_5, category)
102
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
103
+ """
104
+ data_to_insert = []
105
+ for r in records:
106
+ # Clean and prepare data
107
+ try:
108
+ ratings = float(r["ratings"]) if r["ratings"] else 0.0
109
+ except ValueError:
110
+ ratings = 0.0
111
+ try:
112
+ no_of_ratings = int(r["no_of_ratings"]) if r["no_of_ratings"] else 0
113
+ except ValueError:
114
+ no_of_ratings = 0
115
+ row_tuple = (
116
+ str(r["name"]),
117
+ str(r["image"]),
118
+ str(r["link"]),
119
+ ratings,
120
+ no_of_ratings,
121
+ str(r["discount_price"]),
122
+ str(r["actual_price"]),
123
+ str(r["search_terms"]),
124
+ str(r["recommended_5"]),
125
+ str(r["category"])
126
+ )
127
+ data_to_insert.append(row_tuple)
128
+
129
+ cur.executemany(insert_sql, data_to_insert)
130
+ conn.commit()
131
+
132
+ conn.close()
133
+ logger.info("Database creation complete.")
134
+
135
+ app = Flask(__name__)
136
+ app.secret_key = os.getenv("FLASK_SECRET_KEY", "SECUREKEY")
137
+ app.config["SESSION_TYPE"] = "filesystem"
138
+ Session(app)
139
+
140
+ @app.route("/")
141
+ def index():
142
+ """Home page with a search bar."""
143
+ return render_template("index.html")
144
+
145
+ @app.route("/autocomplete")
146
+ def autocomplete():
147
+ """Return (id, name) JSON for substring search in 'name'."""
148
+ q = request.args.get("q", "").strip()
149
+ if not q:
150
+ return jsonify([])
151
+
152
+ conn = sqlite3.connect(DB_PATH)
153
+ cur = conn.cursor()
154
+ sql = f"""
155
+ SELECT id, name
156
+ FROM {TABLE_NAME}
157
+ WHERE LOWER(name) LIKE LOWER(?)
158
+ LIMIT 10
159
+ """
160
+ wildcard = f"%{q}%"
161
+ rows = cur.execute(sql, (wildcard,)).fetchall()
162
+ conn.close()
163
+
164
+ results = [{"id": r[0], "name": r[1]} for r in rows]
165
+ return jsonify(results)
166
+
167
+ @app.route("/product/<int:item_id>")
168
+ def show_product(item_id):
169
+ """Show product detail + top-5 recommended items from recommended_5."""
170
+ conn = sqlite3.connect(DB_PATH)
171
+ cur = conn.cursor()
172
+
173
+ sql = f"SELECT * FROM {TABLE_NAME} WHERE id=?"
174
+ row = cur.execute(sql, (item_id,)).fetchone()
175
+ if not row:
176
+ conn.close()
177
+ return "<h2>Product not found</h2>", 404
178
+
179
+ product = {
180
+ "id": row[0],
181
+ "name": row[1],
182
+ "image": row[2],
183
+ "link": row[3],
184
+ "ratings": row[4],
185
+ "no_of_ratings": row[5],
186
+ "discount_price": row[6],
187
+ "actual_price": row[7],
188
+ "search_terms": row[8],
189
+ "recommended_5": row[9],
190
+ "category": row[10]
191
+ }
192
+
193
+ # Parse recommended_5
194
+ try:
195
+ rec_list = ast.literal_eval(product["recommended_5"])
196
+ if not isinstance(rec_list, list):
197
+ rec_list = []
198
+ except:
199
+ rec_list = []
200
+
201
+ recommended_details = []
202
+ for rec_name in rec_list[:5]:
203
+ sql_rec = f"SELECT * FROM {TABLE_NAME} WHERE name LIKE ? LIMIT 1"
204
+ rec_row = cur.execute(sql_rec, (f"%{rec_name}%",)).fetchone()
205
+ if rec_row:
206
+ recommended_details.append({
207
+ "id": rec_row[0],
208
+ "name": rec_row[1],
209
+ "image": rec_row[2],
210
+ "link": rec_row[3],
211
+ "discount_price": rec_row[6]
212
+ })
213
+
214
+ conn.close()
215
+ return render_template("product.html",
216
+ product=product,
217
+ recommended=recommended_details)
218
+
219
+ @app.route("/rag")
220
+ def rag_index():
221
+ """RAG Chat page storing conversation in session['rag_chat']. """
222
+ if "rag_chat" not in session:
223
+ session["rag_chat"] = []
224
+ return render_template("rag.html", chat_history=session["rag_chat"])
225
+
226
+ @app.route("/rag/query", methods=["POST"])
227
+ def rag_query():
228
+ """
229
+ Process user input with an in-depth approach:
230
+ - Dynamically extract brands, product types, and price limits from the query.
231
+ - Perform precise DB filtering based on extracted parameters.
232
+ - Construct a structured prompt for Gemini using the filtered results.
233
+ - Parse Gemini's response and update the conversation history.
234
+ """
235
+ if "rag_chat" not in session:
236
+ session["rag_chat"] = []
237
+
238
+ user_input = request.form.get("rag_input", "").strip()
239
+ if not user_input:
240
+ return redirect(url_for("rag_index"))
241
+
242
+ session["rag_chat"].append(("user", user_input))
243
+
244
+ brand_keyword, product_type, price_val = extract_query_parameters(user_input)
245
+
246
+ matched_items = filter_database(brand_keyword, product_type, price_val)
247
+
248
+ db_context = build_db_context(matched_items, brand_keyword, product_type, price_val)
249
+
250
+ conversation_text = construct_prompt(session["rag_chat"], db_context)
251
+
252
+ gemini_response = gemini_generate_content(
253
+ api_key=GEMINI_API_KEY,
254
+ conversation_text=conversation_text
255
+ )
256
+
257
+ session["rag_chat"].append(("assistant", gemini_response))
258
+ return redirect(url_for("rag_index"))
259
+
260
+ def extract_query_parameters(user_query):
261
+ """
262
+ Extract brand, product type, and price from the user's query dynamically.
263
+ """
264
+ user_lower = user_query.lower()
265
+
266
+ # Extract price
267
+ price = None
268
+ # Look for patterns like "under 5000", "below 25k", etc.
269
+ price_match = re.search(r'(under|below)\s+₹?(\d+[kK]?)', user_lower)
270
+ if price_match:
271
+ price_str = price_match.group(2)
272
+ if price_str.lower().endswith('k'):
273
+ price = int(price_str[:-1]) * 1000
274
+ else:
275
+ price = int(price_str)
276
+
277
+ # Dynamically extract brands and product types from the database
278
+ conn = sqlite3.connect(DB_PATH)
279
+ cur = conn.cursor()
280
+
281
+ # Fetch distinct categories and search_terms to build dynamic keyword lists
282
+ cur.execute(f"SELECT DISTINCT category FROM {TABLE_NAME}")
283
+ categories = [row[0].lower() for row in cur.fetchall()]
284
+
285
+ cur.execute(f"SELECT DISTINCT search_terms FROM {TABLE_NAME}")
286
+ search_terms = [row[0].lower() for row in cur.fetchall()]
287
+
288
+ conn.close()
289
+
290
+ # Initialize variables
291
+ brand = None
292
+ product_type = None
293
+
294
+ # Check for product types in user query
295
+ for category in categories:
296
+ if category in user_lower:
297
+ product_type = category
298
+ break
299
+
300
+ # If not found in category, check search_terms
301
+ if not product_type:
302
+ for term in search_terms:
303
+ if term in user_lower:
304
+ product_type = term
305
+ break
306
+
307
+ # For brand, attempt to extract from the search_terms by splitting
308
+ possible_brands = set()
309
+ for term in search_terms:
310
+ words = term.split()
311
+ possible_brands.update(words)
312
+
313
+ possible_brands = list(possible_brands)
314
+
315
+ for b in possible_brands:
316
+ if b in user_lower:
317
+ brand = b
318
+ break
319
+
320
+ return brand, product_type, price
321
+
322
+ def filter_database(brand, product_type, price):
323
+ """
324
+ Filter the database based on brand, product type, and price.
325
+ """
326
+ conn = sqlite3.connect(DB_PATH)
327
+ cur = conn.cursor()
328
+
329
+ # Build dynamic SQL query
330
+ sql = f"SELECT id, name, discount_price, recommended_5 FROM {TABLE_NAME} WHERE 1=1"
331
+ params = []
332
+
333
+ if brand:
334
+ sql += " AND LOWER(name) LIKE ?"
335
+ params.append(f"%{brand}%")
336
+ if product_type:
337
+ sql += " AND LOWER(category) LIKE ?"
338
+ params.append(f"%{product_type}%")
339
+ if price:
340
+ # Clean the discount_price field to extract numerical value
341
+ # Assuming discount_price is stored as a string like "₹1,299"
342
+ sql += " AND CAST(REPLACE(REPLACE(discount_price, '₹', ''), ',', '') AS INTEGER) <= ?"
343
+ params.append(price)
344
+
345
+ # Limit to 5000 for performance; adjust as needed
346
+ sql += " LIMIT 5000"
347
+
348
+ rows = cur.execute(sql, tuple(params)).fetchall()
349
+ conn.close()
350
+
351
+ return rows
352
+
353
+ def build_db_context(matched_items, brand, product_type, price):
354
+ """
355
+ Build a structured context string from matched database items.
356
+ """
357
+ db_context = ""
358
+ if matched_items:
359
+ db_context += f"Found {len(matched_items)} items"
360
+ if price:
361
+ db_context += f" under ₹{price}"
362
+ if brand or product_type:
363
+ db_context += " matching your criteria"
364
+ db_context += ":\n"
365
+
366
+ # List up to 10 items for context
367
+ for item in matched_items[:10]:
368
+ item_name = item[1]
369
+ item_price = item[2]
370
+ db_context += f"- {item_name} at ₹{item_price}\n"
371
+ else:
372
+ db_context += "No matching items found in the database.\n"
373
+
374
+ return db_context
375
+
376
+ def construct_prompt(chat_history, db_context):
377
+ """
378
+ Construct the prompt to send to Gemini, including conversation history and DB context.
379
+ """
380
+ prompt = (
381
+ "You are an intelligent assistant that provides product recommendations based on the user's query and the available database.\n\n"
382
+ "Conversation so far:\n"
383
+ )
384
+ for speaker, message in chat_history:
385
+ prompt += f"{speaker.capitalize()}: {message}\n"
386
+
387
+ prompt += f"\nDatabase Context:\n{db_context}\n"
388
+
389
+ prompt += "Based on the above information, provide a helpful and concise answer to the user's query."
390
+
391
+ return prompt
392
+
393
+ def gemini_generate_content(api_key, conversation_text):
394
+ """
395
+ Call the Gemini API's generateContent endpoint with the constructed prompt.
396
+ """
397
+ url = f"{GEMINI_ENDPOINT}?key={api_key}"
398
+
399
+ payload = {
400
+ "contents": [
401
+ {
402
+ "parts": [{"text": conversation_text}]
403
+ }
404
+ ]
405
+ }
406
+
407
+ headers = {"Content-Type": "application/json"}
408
+ try:
409
+ resp = requests.post(url, headers=headers, data=json.dumps(payload))
410
+ except Exception as e:
411
+ logger.error(f"Error during Gemini API request: {e}")
412
+ return f"[Gemini Error] Failed to connect to Gemini API: {e}"
413
+
414
+ try:
415
+ data = resp.json()
416
+ except Exception as e:
417
+ logger.error(f"Invalid JSON response from Gemini API: {e}")
418
+ return f"[Gemini Error] Invalid JSON response: {e}"
419
+
420
+ if resp.status_code != 200:
421
+ logger.error(f"Gemini API returned error {resp.status_code}: {data}")
422
+ return f"[Gemini Error {resp.status_code}] {json.dumps(data, indent=2)}"
423
+
424
+ # Parse the "candidates" structure
425
+ candidates = data.get("candidates", [])
426
+ if not candidates:
427
+ logger.error(f"No candidates received from Gemini API: {data}")
428
+ return f"No candidates received. Debug JSON: {json.dumps(data, indent=2)}"
429
+
430
+ first_candidate = candidates[0]
431
+ content = first_candidate.get("content", {})
432
+ parts = content.get("parts", [])
433
+ if not parts:
434
+ logger.error(f"No 'parts' found in candidate content: {data}")
435
+ return f"No 'parts' found in candidate content. Debug JSON: {json.dumps(data, indent=2)}"
436
+
437
+ assistant_reply = parts[0].get("text", "(No text found in the response)")
438
+ logger.info(f"Gemini Assistant Reply: {assistant_reply}")
439
+ return assistant_reply
440
+
441
+ def main():
442
+ create_db_from_csv(CSV_PATH, DB_PATH)
443
+ logger.info("Starting Flask server at http://127.0.0.1:5000")
444
+ app.run(debug=True)
445
+
446
+ if __name__ == "__main__":
447
+ main()