Spaces:

Omkar008
/

categorize_fashion_data

Running

File size: 4,271 Bytes

from openai import Client
from supabase import create_client
import os
import json
# Initialize Supabase client
url: str = os.getenv('SUPABASE_URL')
key: str = os.getenv('SUPABASE_KEY')
supabase = create_client(url, key)

client = Client(api_key=os.getenv('OPENAI_API_KEY'),organization=os.getenv('ORG_ID'))

def combine_json_arrays(items, values):
    combined = []

    # Iterate over both arrays simultaneously
    for item, value in zip(items, values):
        combined_item = {
            "item_description": item["mention_text"] if isinstance(item, dict) else None,
            "item_value": value["normalizedValue"]["text"] if isinstance(value, dict) and value.get("normalizedValue") else None
        }
        combined.append(combined_item)
    
    # Handle remaining items in the first array (if any)
    if len(items) > len(values):
        for item in items[len(values):]:
            combined_item = {
                "item_description": item["mention_text"] if isinstance(item, dict) else None,
                "item_value": None  # No corresponding value
            }
            combined.append(combined_item)
    
    # Handle remaining values in the second array (if any)
    elif len(values) > len(items):
        for value in values[len(items):]:
            combined_item = {
                "item_description": None,  # No corresponding item
                "item_value": value["normalizedValue"]["text"] if isinstance(value, dict) and value.get("normalizedValue") else None
            }
            combined.append(combined_item)
    
    return combined


def prompt_for_categorization(data):
    output_json = """
    {
    {
    "item_description":"HAND CHARM NECKLACE",
    "metadata":{
    "Tops": [],
    "Bottoms": [],
    "Footwear": [],
    "Accessories": ["Jewelry"],
    "Activewear": [],
    "Others": []
    }
    },
    
    {
    "item_description":"Jersey Top Slim fit",
    "metadata":{
    "Tops": ["T Shirts"],
    "Bottoms": [],
    "Footwear": [],
    "Accessories": [],
    "Activewear": [],
    "Others": []
    }
    }
    }
    """
    system_prompt = """ You will act as a fashion assistant where you will be provided with a JSON array of fashion items, each containing an "item_description" and an "item_value". Your task is to categorize strictly categorize each item into predefined fashion categories and subcategories:
    - Tops:T-Shirts, Shirts, Blouses, Tank Tops, Sweaters ,Coats, Jackets, Raincoats, Blazers, Hoodies, Sweatshirts
    - Bottoms: Pants, Jeans, Shorts, Skirts
    - Footwear: Sneakers, Sandals, Heels , Socks
    - Accessories:Belts, Hats, Bags, Scarves, Jewelry, Watches
    - Activewear: Sports Bras, Yoga Pants, Athletic Shorts
    - Classify all the other categories as Others .

    You will be given an input json as follows : \n""" + data + f"""\n Based on the above data you have to strictly generate output json and return only and only the output json as given :\n""" \
    + output_json # Concatenate output_json directly to the previous line
    
    return system_prompt

async def extract_fashion_categories(user_id:str , email:str, message_id:str):

    
    response = supabase.table("document_ai_entities").select("line_item_description").eq("user_id",user_id).eq("email",email).eq("message_id",message_id).execute()
    print("printing response.data")
    print(response.data)

    items = response.data[0].get('line_item_description',None)
    values = response.data[0].get('line_item_amount',None)
    print("items",items)
    # print("values",values)
    # combined_json = combine_json_arrays(items,values)
    # print(combined_json)
    
    prompt = prompt_for_categorization(items)
    completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "user",
            "content": prompt
            }
        ],
    response_format={ "type": "json_object" }
    )
    print("Printing GPT response")
    print(completion.choices[0].message)
    response = (
        supabase.table("document_ai_entities")
        .update({"categorised_data": json.loads(completion.choices[0].message.content)})
        .eq("user_id", user_id)
        .eq('email',email)
        .eq('message_id',message_id)
        .execute()
    )