import numpy as np import pickle from src.cocktails.utilities.cocktail_utilities import get_profile, profile_keys from src.cocktails.utilities.ingredients_utilities import extract_ingredients, ingredient_list, ingredient_profiles from src.cocktails.utilities.glass_and_volume_utilities import glass_volume, volume_ranges one_dash = 1 one_splash = 6 one_tablespoon = 15 one_barspoon = 5 fill_rate = 0.8 quantity_factors ={'ml':1, 'cl':10, 'splash':one_splash, 'splashes':one_splash, 'dash':one_dash, 'dashes':one_dash, 'spoon':one_barspoon, 'spoons':one_barspoon, 'tablespoon':one_tablespoon, 'barspoons':one_barspoon, 'barspoon':one_barspoon, 'bar spoons': one_barspoon, 'bar spoon': one_barspoon, 'tablespoons':one_tablespoon, 'teaspoon':5, 'teaspoons':5, 'drop':0.05, 'drops':0.05} quantitiy_keys = sorted(quantity_factors.keys()) indexes_keys = np.flip(np.argsort([len(k) for k in quantitiy_keys])) quantity_factors_keys = list(np.array(quantitiy_keys)[indexes_keys]) keys_to_track = ['names', 'urls', 'glass', 'garnish', 'recipe', 'how_to', 'review', 'taste_rep', 'valid'] keys_to_add = ['category', 'subcategory', 'ingredients_str', 'ingredients', 'quantities', 'to_keep'] keys_to_update = ['glass'] keys_for_csv = ['names', 'category', 'subcategory', 'ingredients_str', 'urls', 'glass', 'garnish', 'how_to', 'review', 'taste_rep'] + profile_keys to_replace_q = {' fresh': ''} to_replace_ing = {'maple syrup': 'honey syrup', 'agave syrup': 'honey syrup', 'basil': 'mint'} def print_recipe(unit='mL', ingredient_str=None, ingredients=None, quantities=None, name='', cat='', to_print=True): str_out = '' if ingredient_str is None: assert len(ingredients) == len(quantities), 'provide either ingredient_str, or list ingredients and quantities' else: assert ingredients is None and quantities is None, 'provide either ingredient_str, or list ingredients and quantities' ingredients, quantities = extract_ingredients(ingredient_str) str_out += f'\nRecipe:' if name != '' and name is not None: str_out += f' {name}' if cat != '': str_out += f' ({cat})' str_out += '\n' for i in range(len(ingredients)): # get quantifier if ingredients[i] == 'egg': quantities[i] = 1 ingredients[i] = 'egg white' if unit == 'mL': quantifier = ' (30 mL)' elif unit == 'oz': quantifier = ' (1 fl oz)' else: raise ValueError elif ingredients[i] in ['angostura', 'orange bitters']: quantities[i] = max(1, int(quantities[i] / 0.6)) quantifier = ' dash' if quantities[i] > 1: quantifier += 'es' elif ingredients[i] == 'mint': if quantities[i] > 1: quantifier = ' leaves' else: quantifier = ' leaf' else: if unit == "oz": quantities[i] = float(f"{quantities[i] * 0.033814:.3f}") # convert to fl oz quantifier = ' fl oz' else: quantifier = ' mL' str_out += f' {quantities[i]}{quantifier} - {ingredients[i]}\n' if to_print: print(str_out) return str_out def test_datapoint(datapoint, category, ingredients, quantities): # run checks ingredient_indexes = [ingredient_list.index(ing) for ing in ingredients] profile = get_profile(category, ingredients, quantities) volume = profile['end volume'] alcohol = profile['end alcohol'] acid = profile['end acid'] sugar = profile['end sugar'] # check volume if datapoint['glass'] != None: if volume > glass_volume[datapoint['glass']] * fill_rate: # recompute quantities for it to match ratio = fill_rate * glass_volume[datapoint['glass']] / volume for i_q in range(len(quantities)): quantities[i_q] = float(f'{quantities[i_q] * ratio:.2f}') # check alcohol assert alcohol < 30, 'too boozy' assert alcohol < 5, 'not boozy enough' assert acid < 2, 'too much acid' assert sugar < 20, 'too much sugar' assert len(ingredients) > 1, 'only one ingredient' if len(set(ingredients)) != len(ingredients): i_doubles = [] s_ing = set() for i, ing in enumerate(ingredients): if ing in s_ing: i_doubles.append(i) else: s_ing.add(ing) ingredient_double_ok = ['mint', 'cointreau', 'lemon juice', 'cuban rum', 'double syrup'] if len(i_doubles) == 1 and ingredients[i_doubles[0]] in ingredient_double_ok: ing_double = ingredients[i_doubles[0]] double_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == ing_double]) ingredients.pop(i_doubles[0]) quantities.pop(i_doubles[0]) quantities[ingredients.index(ing_double)] = double_q else: assert False, f'double ingredient, not {ingredient_double_ok}' lemon_lime_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] in ['lime juice', 'lemon juice']]) assert lemon_lime_q <= 45, 'too much lemon and lime' salt_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'salt']) assert salt_q <= 8, 'too much salt' bitter_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] in ['angostura', 'orange bitters']]) assert bitter_q <= 5 * one_dash, 'too much bitter' absinthe_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'absinthe']) if absinthe_q > 4 * one_dash: mix_volume = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] != 'mint']) assert absinthe_q < 0.5 * mix_volume, 'filter absinthe glasses' if any([w in datapoint['how_to'] or any([w in ing.lower() for ing in datapoint['recipe'][1]]) for w in ['warm', 'boil', 'hot']]) and 'shot' not in datapoint['how_to']: assert False water_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'water']) assert water_q < 40 # n_liqueur = np.sum([ingredient_profiles['type'][i].lower() == 'liqueur' for i in ingredient_indexes]) # assert n_liqueur <= 2 n_liqueur_and_vermouth = np.sum([ingredient_profiles['type'][i].lower() in ['liqueur', 'vermouth'] for i in ingredient_indexes]) assert n_liqueur_and_vermouth <= 3 return ingredients, quantities def run_battery_checks_difford(datapoint, category, ingredients, quantities): flag = False try: ingredients, quantities = test_datapoint(datapoint, category, ingredients, quantities) except: flag = True print(datapoint["names"]) print(datapoint["urls"]) ingredients, quantities = None, None return flag, ingredients, quantities def tambouille(q, ingredients_scrubbed, quantities_scrubbed, cat): # ugly ing_scrubbed = ingredients_scrubbed[len(quantities_scrubbed)] if q == '4 cube' and ing_scrubbed == 'pineapple juice': q = '20 ml' elif 'top up with' in q: volume_so_far = np.sum([quantities_scrubbed[i] for i in range(len(quantities_scrubbed)) if ingredients_scrubbed[i] != 'mint']) volume_mix = np.sum(volume_ranges[cat]) / 2 if (volume_mix - volume_so_far) < 15: q = '15 ml'# else: q = str(int(volume_mix - volume_so_far)) + ' ml' elif q == '1 pinch' and ing_scrubbed == 'salt': q = '2 drops' elif 'cube' in q and ing_scrubbed == 'double syrup': q = f'{float(q.split(" ")[0]) * 2 * 1.7:.2f} ml' #2g per cube, 1.7 is ratio solid / syrup elif 'wedge' in q: if ing_scrubbed == 'orange juice': vol = 70 elif ing_scrubbed == 'lime juice': vol = 30 elif ing_scrubbed == 'lemon juice': vol = 45 elif ing_scrubbed == 'pineapple juice': vol = 140 factor = float(q.split(' ')[0]) * 0.15 # consider a wedge to be 0.15*the fruit. q = f'{factor * vol:.2f} ml' elif 'slice' in q: if ing_scrubbed == 'orange juice': vol = 70 elif ing_scrubbed == 'lime juice': vol = 30 elif ing_scrubbed == 'lemon juice': vol = 45 elif ing_scrubbed == 'pineapple juice': vol = 140 f = q.split(' ')[0] if len(f.split('⁄')) > 1: frac = f.split('⁄') factor = float(frac[0]) / float(frac[1]) else: factor = float(f) factor *= 0.1 # consider a slice to be 0.1*the fruit. q = f'{factor * vol:.2f} ml' elif q == '1 whole' and ing_scrubbed == 'luxardo maraschino': q = '10 ml' elif ing_scrubbed == 'egg' and 'ml' not in q: q = f'{float(q) * 30:.2f} ml' # 30 ml per egg return q def compute_eucl_dist(a, b): return np.sqrt(np.sum((a - b)**2)) def evaluate_with_quadruplets(representations, strategy='all'): with open(QUADRUPLETS_PATH, 'rb') as f: data = pickle.load(f) data = list(data.values()) quadruplets = [] if strategy != 'all': for d in data: if d[0] == strategy: quadruplets.append(d[1:]) elif strategy == 'all': for d in data: quadruplets.append(d[1:]) else: raise ValueError scores = [] for q in quadruplets: close = q[0] if len(close) == 2: far = q[1] distance_close = compute_eucl_dist(representations[close[0]], representations[close[1]]) distances_far = [compute_eucl_dist(representations[far[i][0]], representations[far[i][1]]) for i in range(len(far))] scores.append(distance_close < np.min(distances_far)) if len(scores) == 0: score = np.nan else: score = np.mean(scores) return score