File size: 10,257 Bytes
93c029f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import numpy as np
import pickle
from src.cocktails.utilities.cocktail_utilities import get_profile, profile_keys
from src.cocktails.utilities.ingredients_utilities import extract_ingredients, ingredient_list, ingredient_profiles
from src.cocktails.utilities.glass_and_volume_utilities import glass_volume, volume_ranges

one_dash = 1
one_splash = 6
one_tablespoon = 15
one_barspoon = 5
fill_rate = 0.8
quantity_factors ={'ml':1,
                   'cl':10,
                   'splash':one_splash,
                   'splashes':one_splash,
                   'dash':one_dash,
                   'dashes':one_dash,
                   'spoon':one_barspoon,
                   'spoons':one_barspoon,
                   'tablespoon':one_tablespoon,
                   'barspoons':one_barspoon,
                   'barspoon':one_barspoon,
                   'bar spoons': one_barspoon,
                   'bar spoon': one_barspoon,
                   'tablespoons':one_tablespoon,
                   'teaspoon':5,
                   'teaspoons':5,
                   'drop':0.05,
                   'drops':0.05}
quantitiy_keys = sorted(quantity_factors.keys())
indexes_keys = np.flip(np.argsort([len(k) for k in quantitiy_keys]))
quantity_factors_keys = list(np.array(quantitiy_keys)[indexes_keys])

keys_to_track = ['names', 'urls', 'glass', 'garnish', 'recipe', 'how_to', 'review', 'taste_rep', 'valid']
keys_to_add = ['category', 'subcategory', 'ingredients_str', 'ingredients', 'quantities', 'to_keep']
keys_to_update = ['glass']
keys_for_csv = ['names', 'category', 'subcategory', 'ingredients_str', 'urls', 'glass', 'garnish', 'how_to', 'review', 'taste_rep'] + profile_keys

to_replace_q = {' fresh': ''}
to_replace_ing = {'maple syrup': 'honey syrup',
                  'agave syrup': 'honey syrup',
                  'basil': 'mint'}

def print_recipe(unit='mL', ingredient_str=None, ingredients=None, quantities=None, name='', cat='', to_print=True):
    str_out = ''
    if ingredient_str is None:
        assert len(ingredients) == len(quantities), 'provide either ingredient_str, or list ingredients and quantities'
    else:
        assert ingredients is None and quantities is None, 'provide either ingredient_str, or list ingredients and quantities'
        ingredients, quantities = extract_ingredients(ingredient_str)

    str_out += f'\nRecipe:'
    if name != '' and name is not None: str_out += f' {name}'
    if cat != '': str_out += f' ({cat})'
    str_out += '\n'
    for i in range(len(ingredients)):
        # get quantifier
        if ingredients[i] == 'egg':
            quantities[i] = 1
            ingredients[i] = 'egg white'
            if unit == 'mL':
                quantifier = ' (30 mL)'
            elif unit == 'oz':
                quantifier = ' (1 fl oz)'
            else:
                raise ValueError
        elif ingredients[i] in ['angostura', 'orange bitters']:
            quantities[i] = max(1, int(quantities[i] / 0.6))
            quantifier = ' dash'
            if quantities[i] > 1: quantifier += 'es'
        elif ingredients[i] == 'mint':
            if quantities[i] > 1: quantifier = ' leaves'
            else: quantifier = ' leaf'
        else:
            if unit == "oz":
                quantities[i] = float(f"{quantities[i] * 0.033814:.3f}")  # convert to fl oz
                quantifier = ' fl oz'
            else:
                quantifier = ' mL'
        str_out += f'   {quantities[i]}{quantifier} - {ingredients[i]}\n'

    if to_print:
        print(str_out)
    return str_out


def test_datapoint(datapoint, category, ingredients, quantities):
    # run checks
    ingredient_indexes = [ingredient_list.index(ing) for ing in ingredients]
    profile = get_profile(category, ingredients, quantities)
    volume = profile['end volume']
    alcohol = profile['end alcohol']
    acid = profile['end acid']
    sugar = profile['end sugar']
    # check volume
    if datapoint['glass'] != None:
        if volume > glass_volume[datapoint['glass']] * fill_rate:
            # recompute quantities for it to match
            ratio = fill_rate *  glass_volume[datapoint['glass']] / volume
            for i_q in range(len(quantities)):
                quantities[i_q] = float(f'{quantities[i_q] * ratio:.2f}')
    # check alcohol
    assert alcohol < 30, 'too boozy'
    assert alcohol < 5, 'not boozy enough'
    assert acid < 2, 'too much acid'
    assert sugar < 20, 'too much sugar'
    assert len(ingredients) > 1, 'only one ingredient'
    if len(set(ingredients)) != len(ingredients):
        i_doubles = []
        s_ing = set()
        for i, ing in enumerate(ingredients):
            if ing in s_ing:
                i_doubles.append(i)
            else:
                s_ing.add(ing)
        ingredient_double_ok = ['mint', 'cointreau', 'lemon juice', 'cuban rum', 'double syrup']
        if len(i_doubles) == 1 and ingredients[i_doubles[0]] in ingredient_double_ok:
            ing_double = ingredients[i_doubles[0]]
            double_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == ing_double])
            ingredients.pop(i_doubles[0])
            quantities.pop(i_doubles[0])
            quantities[ingredients.index(ing_double)] = double_q
        else:
            assert False, f'double ingredient, not {ingredient_double_ok}'
    lemon_lime_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] in ['lime juice', 'lemon juice']])
    assert lemon_lime_q <= 45, 'too much lemon and lime'
    salt_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'salt'])
    assert salt_q <= 8, 'too much salt'
    bitter_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] in ['angostura', 'orange bitters']])
    assert bitter_q <= 5 * one_dash, 'too much bitter'
    absinthe_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'absinthe'])
    if absinthe_q > 4 * one_dash:
        mix_volume = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] != 'mint'])
        assert absinthe_q < 0.5 * mix_volume, 'filter absinthe glasses'
    if any([w in datapoint['how_to'] or any([w in ing.lower() for ing in datapoint['recipe'][1]]) for w in ['warm', 'boil', 'hot']]) and 'shot' not in datapoint['how_to']:
        assert False
    water_q = np.sum([quantities[i] for i in range(len(ingredients)) if ingredients[i] == 'water'])
    assert water_q < 40
    # n_liqueur = np.sum([ingredient_profiles['type'][i].lower() == 'liqueur' for i in ingredient_indexes])
    # assert n_liqueur <= 2
    n_liqueur_and_vermouth = np.sum([ingredient_profiles['type'][i].lower() in ['liqueur', 'vermouth'] for i in ingredient_indexes])
    assert n_liqueur_and_vermouth <= 3
    return ingredients, quantities

def run_battery_checks_difford(datapoint, category, ingredients, quantities):
    flag = False
    try:
        ingredients, quantities = test_datapoint(datapoint, category, ingredients, quantities)
    except:
        flag = True
        print(datapoint["names"])
        print(datapoint["urls"])
        ingredients, quantities = None, None

    return flag, ingredients, quantities

def tambouille(q, ingredients_scrubbed, quantities_scrubbed, cat):
    # ugly
    ing_scrubbed = ingredients_scrubbed[len(quantities_scrubbed)]
    if q == '4 cube' and ing_scrubbed == 'pineapple juice':
        q = '20 ml'
    elif 'top up with' in q:
        volume_so_far = np.sum([quantities_scrubbed[i] for i in range(len(quantities_scrubbed)) if ingredients_scrubbed[i] != 'mint'])
        volume_mix = np.sum(volume_ranges[cat]) / 2
        if (volume_mix - volume_so_far) < 15:
            q = '15 ml'#
        else:
            q = str(int(volume_mix - volume_so_far)) + ' ml'
    elif q == '1 pinch' and ing_scrubbed == 'salt':
        q = '2 drops'
    elif 'cube' in q and ing_scrubbed == 'double syrup':
        q = f'{float(q.split(" ")[0]) * 2 * 1.7:.2f} ml'  #2g per cube, 1.7 is ratio solid / syrup
    elif 'wedge' in q:
        if ing_scrubbed == 'orange juice':
            vol = 70
        elif ing_scrubbed == 'lime juice':
            vol = 30
        elif ing_scrubbed == 'lemon juice':
            vol = 45
        elif ing_scrubbed == 'pineapple juice':
            vol = 140
        factor = float(q.split(' ')[0]) * 0.15 # consider a wedge to be 0.15*the fruit.
        q = f'{factor * vol:.2f} ml'
    elif 'slice' in q:
        if ing_scrubbed == 'orange juice':
            vol = 70
        elif ing_scrubbed == 'lime juice':
            vol = 30
        elif ing_scrubbed == 'lemon juice':
            vol = 45
        elif ing_scrubbed == 'pineapple juice':
            vol = 140
        f = q.split(' ')[0]
        if len(f.split('⁄')) > 1:
            frac = f.split('⁄')
            factor = float(frac[0]) / float(frac[1])
        else:
            factor = float(f)
        factor *= 0.1 # consider a slice to be 0.1*the fruit.
        q = f'{factor * vol:.2f} ml'
    elif q == '1 whole' and ing_scrubbed == 'luxardo maraschino':
        q = '10 ml'
    elif ing_scrubbed == 'egg' and 'ml' not in q:
        q = f'{float(q) * 30:.2f} ml'  # 30 ml per egg
    return q


def compute_eucl_dist(a, b):
    return np.sqrt(np.sum((a - b)**2))

def evaluate_with_quadruplets(representations, strategy='all'):
    with open(QUADRUPLETS_PATH, 'rb') as f:
        data = pickle.load(f)
    data = list(data.values())
    quadruplets = []
    if strategy != 'all':
        for d in data:
            if d[0] == strategy:
                quadruplets.append(d[1:])
    elif strategy == 'all':
        for d in data:
            quadruplets.append(d[1:])
    else:
        raise ValueError

    scores = []
    for q in quadruplets:
        close = q[0]
        if len(close) == 2:
            far = q[1]
            distance_close = compute_eucl_dist(representations[close[0]], representations[close[1]])
            distances_far = [compute_eucl_dist(representations[far[i][0]], representations[far[i][1]]) for i in range(len(far))]
            scores.append(distance_close < np.min(distances_far))
    if len(scores) == 0:
        score = np.nan
    else:
        score = np.mean(scores)
    return score