Spaces:

Omkar008
/

categorize_fashion_data

Running

App Files Files Community

categorize_fashion_data / extract_insights.py

Omkar008

Update extract_insights.py

5401162 verified about 2 months ago

raw

history blame contribute delete

4.27 kB

	from openai import Client
	from supabase import create_client
	import os
	import json
	# Initialize Supabase client
	url: str = os.getenv('SUPABASE_URL')
	key: str = os.getenv('SUPABASE_KEY')
	supabase = create_client(url, key)

	client = Client(api_key=os.getenv('OPENAI_API_KEY'),organization=os.getenv('ORG_ID'))

	def combine_json_arrays(items, values):
	combined = []

	# Iterate over both arrays simultaneously
	for item, value in zip(items, values):
	combined_item = {
	"item_description": item["mention_text"] if isinstance(item, dict) else None,
	"item_value": value["normalizedValue"]["text"] if isinstance(value, dict) and value.get("normalizedValue") else None
	}
	combined.append(combined_item)

	# Handle remaining items in the first array (if any)
	if len(items) > len(values):
	for item in items[len(values):]:
	combined_item = {
	"item_description": item["mention_text"] if isinstance(item, dict) else None,
	"item_value": None # No corresponding value
	}
	combined.append(combined_item)

	# Handle remaining values in the second array (if any)
	elif len(values) > len(items):
	for value in values[len(items):]:
	combined_item = {
	"item_description": None, # No corresponding item
	"item_value": value["normalizedValue"]["text"] if isinstance(value, dict) and value.get("normalizedValue") else None
	}
	combined.append(combined_item)

	return combined


	def prompt_for_categorization(data):
	output_json = """
	{
	{
	"item_description":"HAND CHARM NECKLACE",
	"metadata":{
	"Tops": [],
	"Bottoms": [],
	"Footwear": [],
	"Accessories": ["Jewelry"],
	"Activewear": [],
	"Others": []
	}
	},

	{
	"item_description":"Jersey Top Slim fit",
	"metadata":{
	"Tops": ["T Shirts"],
	"Bottoms": [],
	"Footwear": [],
	"Accessories": [],
	"Activewear": [],
	"Others": []
	}
	}
	}
	"""
	system_prompt = """ You will act as a fashion assistant where you will be provided with a JSON array of fashion items, each containing an "item_description" and an "item_value". Your task is to categorize strictly categorize each item into predefined fashion categories and subcategories:
	- Tops:T-Shirts, Shirts, Blouses, Tank Tops, Sweaters ,Coats, Jackets, Raincoats, Blazers, Hoodies, Sweatshirts
	- Bottoms: Pants, Jeans, Shorts, Skirts
	- Footwear: Sneakers, Sandals, Heels , Socks
	- Accessories:Belts, Hats, Bags, Scarves, Jewelry, Watches
	- Activewear: Sports Bras, Yoga Pants, Athletic Shorts
	- Classify all the other categories as Others .

	You will be given an input json as follows : \n""" + data + f"""\n Based on the above data you have to strictly generate output json and return only and only the output json as given :\n""" \
	+ output_json # Concatenate output_json directly to the previous line

	return system_prompt

	async def extract_fashion_categories(user_id:str , email:str, message_id:str):


	response = supabase.table("document_ai_entities").select("line_item_description").eq("user_id",user_id).eq("email",email).eq("message_id",message_id).execute()
	print("printing response.data")
	print(response.data)

	items = response.data[0].get('line_item_description',None)
	values = response.data[0].get('line_item_amount',None)
	print("items",items)
	# print("values",values)
	# combined_json = combine_json_arrays(items,values)
	# print(combined_json)

	prompt = prompt_for_categorization(items)
	completion = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{
	"role": "user",
	"content": prompt
	}
	],
	response_format={ "type": "json_object" }
	)
	print("Printing GPT response")
	print(completion.choices[0].message)
	response = (
	supabase.table("document_ai_entities")
	.update({"categorised_data": json.loads(completion.choices[0].message.content)})
	.eq("user_id", user_id)
	.eq('email',email)
	.eq('message_id',message_id)
	.execute()
	)