categorize_fashion_data / extract_insights.py
Omkar008's picture
Update extract_insights.py
5401162 verified
from openai import Client
from supabase import create_client
import os
import json
# Initialize Supabase client
url: str = os.getenv('SUPABASE_URL')
key: str = os.getenv('SUPABASE_KEY')
supabase = create_client(url, key)
client = Client(api_key=os.getenv('OPENAI_API_KEY'),organization=os.getenv('ORG_ID'))
def combine_json_arrays(items, values):
combined = []
# Iterate over both arrays simultaneously
for item, value in zip(items, values):
combined_item = {
"item_description": item["mention_text"] if isinstance(item, dict) else None,
"item_value": value["normalizedValue"]["text"] if isinstance(value, dict) and value.get("normalizedValue") else None
}
combined.append(combined_item)
# Handle remaining items in the first array (if any)
if len(items) > len(values):
for item in items[len(values):]:
combined_item = {
"item_description": item["mention_text"] if isinstance(item, dict) else None,
"item_value": None # No corresponding value
}
combined.append(combined_item)
# Handle remaining values in the second array (if any)
elif len(values) > len(items):
for value in values[len(items):]:
combined_item = {
"item_description": None, # No corresponding item
"item_value": value["normalizedValue"]["text"] if isinstance(value, dict) and value.get("normalizedValue") else None
}
combined.append(combined_item)
return combined
def prompt_for_categorization(data):
output_json = """
{
{
"item_description":"HAND CHARM NECKLACE",
"metadata":{
"Tops": [],
"Bottoms": [],
"Footwear": [],
"Accessories": ["Jewelry"],
"Activewear": [],
"Others": []
}
},
{
"item_description":"Jersey Top Slim fit",
"metadata":{
"Tops": ["T Shirts"],
"Bottoms": [],
"Footwear": [],
"Accessories": [],
"Activewear": [],
"Others": []
}
}
}
"""
system_prompt = """ You will act as a fashion assistant where you will be provided with a JSON array of fashion items, each containing an "item_description" and an "item_value". Your task is to categorize strictly categorize each item into predefined fashion categories and subcategories:
- Tops:T-Shirts, Shirts, Blouses, Tank Tops, Sweaters ,Coats, Jackets, Raincoats, Blazers, Hoodies, Sweatshirts
- Bottoms: Pants, Jeans, Shorts, Skirts
- Footwear: Sneakers, Sandals, Heels , Socks
- Accessories:Belts, Hats, Bags, Scarves, Jewelry, Watches
- Activewear: Sports Bras, Yoga Pants, Athletic Shorts
- Classify all the other categories as Others .
You will be given an input json as follows : \n""" + data + f"""\n Based on the above data you have to strictly generate output json and return only and only the output json as given :\n""" \
+ output_json # Concatenate output_json directly to the previous line
return system_prompt
async def extract_fashion_categories(user_id:str , email:str, message_id:str):
response = supabase.table("document_ai_entities").select("line_item_description").eq("user_id",user_id).eq("email",email).eq("message_id",message_id).execute()
print("printing response.data")
print(response.data)
items = response.data[0].get('line_item_description',None)
values = response.data[0].get('line_item_amount',None)
print("items",items)
# print("values",values)
# combined_json = combine_json_arrays(items,values)
# print(combined_json)
prompt = prompt_for_categorization(items)
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": prompt
}
],
response_format={ "type": "json_object" }
)
print("Printing GPT response")
print(completion.choices[0].message)
response = (
supabase.table("document_ai_entities")
.update({"categorised_data": json.loads(completion.choices[0].message.content)})
.eq("user_id", user_id)
.eq('email',email)
.eq('message_id',message_id)
.execute()
)