File size: 4,271 Bytes
6170e72
177481a
651864f
5401162
6170e72
 
 
177481a
6170e72
 
 
3ae2f7d
 
 
 
 
 
96bd214
 
3ae2f7d
 
 
 
 
 
 
96bd214
3ae2f7d
 
 
 
 
 
 
 
 
96bd214
3ae2f7d
 
 
 
 
6170e72
4060422
 
 
 
 
 
 
 
 
 
 
 
 
 
6170e72
4060422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b84d7c9
 
4060422
 
6170e72
 
 
 
586415d
b6b7d1d
6170e72
 
7ef0bc5
 
600c02e
586415d
 
 
b4c566e
95d83d5
586415d
 
 
 
 
 
 
 
 
 
 
 
ae32c4e
 
6950544
ae32c4e
 
 
 
 
6170e72
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from openai import Client
from supabase import create_client
import os
import json
# Initialize Supabase client
url: str = os.getenv('SUPABASE_URL')
key: str = os.getenv('SUPABASE_KEY')
supabase = create_client(url, key)

client = Client(api_key=os.getenv('OPENAI_API_KEY'),organization=os.getenv('ORG_ID'))

def combine_json_arrays(items, values):
    combined = []

    # Iterate over both arrays simultaneously
    for item, value in zip(items, values):
        combined_item = {
            "item_description": item["mention_text"] if isinstance(item, dict) else None,
            "item_value": value["normalizedValue"]["text"] if isinstance(value, dict) and value.get("normalizedValue") else None
        }
        combined.append(combined_item)
    
    # Handle remaining items in the first array (if any)
    if len(items) > len(values):
        for item in items[len(values):]:
            combined_item = {
                "item_description": item["mention_text"] if isinstance(item, dict) else None,
                "item_value": None  # No corresponding value
            }
            combined.append(combined_item)
    
    # Handle remaining values in the second array (if any)
    elif len(values) > len(items):
        for value in values[len(items):]:
            combined_item = {
                "item_description": None,  # No corresponding item
                "item_value": value["normalizedValue"]["text"] if isinstance(value, dict) and value.get("normalizedValue") else None
            }
            combined.append(combined_item)
    
    return combined


def prompt_for_categorization(data):
    output_json = """
    {
    {
    "item_description":"HAND CHARM NECKLACE",
    "metadata":{
    "Tops": [],
    "Bottoms": [],
    "Footwear": [],
    "Accessories": ["Jewelry"],
    "Activewear": [],
    "Others": []
    }
    },
    
    {
    "item_description":"Jersey Top Slim fit",
    "metadata":{
    "Tops": ["T Shirts"],
    "Bottoms": [],
    "Footwear": [],
    "Accessories": [],
    "Activewear": [],
    "Others": []
    }
    }
    }
    """
    system_prompt = """ You will act as a fashion assistant where you will be provided with a JSON array of fashion items, each containing an "item_description" and an "item_value". Your task is to categorize strictly categorize each item into predefined fashion categories and subcategories:
    - Tops:T-Shirts, Shirts, Blouses, Tank Tops, Sweaters ,Coats, Jackets, Raincoats, Blazers, Hoodies, Sweatshirts
    - Bottoms: Pants, Jeans, Shorts, Skirts
    - Footwear: Sneakers, Sandals, Heels , Socks
    - Accessories:Belts, Hats, Bags, Scarves, Jewelry, Watches
    - Activewear: Sports Bras, Yoga Pants, Athletic Shorts
    - Classify all the other categories as Others .

    You will be given an input json as follows : \n""" + data + f"""\n Based on the above data you have to strictly generate output json and return only and only the output json as given :\n""" \
    + output_json # Concatenate output_json directly to the previous line
    
    return system_prompt

async def extract_fashion_categories(user_id:str , email:str, message_id:str):

    
    response = supabase.table("document_ai_entities").select("line_item_description").eq("user_id",user_id).eq("email",email).eq("message_id",message_id).execute()
    print("printing response.data")
    print(response.data)

    items = response.data[0].get('line_item_description',None)
    values = response.data[0].get('line_item_amount',None)
    print("items",items)
    # print("values",values)
    # combined_json = combine_json_arrays(items,values)
    # print(combined_json)
    
    prompt = prompt_for_categorization(items)
    completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "user",
            "content": prompt
            }
        ],
    response_format={ "type": "json_object" }
    )
    print("Printing GPT response")
    print(completion.choices[0].message)
    response = (
        supabase.table("document_ai_entities")
        .update({"categorised_data": json.loads(completion.choices[0].message.content)})
        .eq("user_id", user_id)
        .eq('email',email)
        .eq('message_id',message_id)
        .execute()
    )