Calorie-Calculator / rec_system.py
shrut27's picture
Upload 14 files
48fd95e
import nltk
import string
import ast
import re
import unidecode
import pandas as pd
import streamlit as st
import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
def ingredient_parser(ingreds):
measures = ['teaspoon', 't', 'tsp', 'tablespoon', 'T', 'tbl.', 'tb', 'tbsp.', 'fluid ounce', 'fl oz', 'gill', 'cup', 'c', 'pint', 'p', 'pt', 'fl pt', 'quart', 'q', 'qt', 'fl qt', 'gallon', 'g', 'gal', 'ml', 'milliliter', 'millilitre', 'cc', 'mL', 'l', 'liter', 'litre', 'L', 'dl', 'deciliter', 'decilitre', 'dL', 'bulb', 'level', 'heaped', 'rounded', 'whole', 'pinch', 'medium', 'slice', 'pound', 'lb', '#', 'ounce', 'oz', 'mg', 'milligram', 'milligramme', 'g', 'gram', 'gramme', 'kg', 'kilogram', 'kilogramme', 'x', 'of', 'mm', 'millimetre', 'millimeter', 'cm', 'centimeter', 'centimetre', 'm', 'meter', 'metre', 'inch', 'in', 'milli', 'centi', 'deci', 'hecto', 'kilo']
words_to_remove = ['fresh', 'oil', 'a', 'red', 'bunch', 'and', 'clove', 'or', 'leaf', 'chilly','chillies', 'large', 'extra', 'sprig', 'ground', 'handful', 'free', 'small', 'pepper', 'virgin', 'range', 'from', 'dried', 'sustainable', 'black', 'peeled', 'higher', 'welfare', 'seed', 'for', 'finely', 'freshly', 'sea', 'quality', 'white', 'ripe', 'few', 'piece', 'source', 'to', 'organic', 'flat', 'smoked', 'ginger', 'sliced', 'green', 'picked', 'the', 'stick', 'plain', 'plus', 'mixed', 'mint', 'bay', 'basil', 'your', 'cumin', 'optional', 'fennel', 'serve', 'mustard', 'unsalted', 'baby', 'paprika', 'fat', 'ask', 'natural', 'skin', 'roughly', 'into', 'such', 'cut', 'good', 'brown', 'grated', 'trimmed', 'oregano', 'powder', 'yellow', 'dusting', 'knob', 'frozen', 'on', 'deseeded', 'low', 'runny', 'balsamic', 'cooked', 'streaky', 'nutmeg', 'sage', 'rasher', 'zest', 'pin', 'groundnut', 'breadcrumb', 'turmeric', 'halved', 'grating', 'stalk', 'light', 'tinned', 'dry', 'soft', 'rocket', 'bone', 'colour', 'washed', 'skinless', 'leftover', 'splash', 'removed', 'dijon', 'thick', 'big', 'hot', 'drained', 'sized', 'chestnut', 'watercress', 'fishmonger', 'english', 'dill', 'caper', 'raw', 'worcestershire', 'flake', 'cider', 'cayenne', 'tbsp', 'leg', 'pine', 'wild', 'if', 'fine', 'herb', 'almond', 'shoulder', 'cube', 'dressing', 'with', 'chunk', 'spice', 'thumb', 'garam', 'new', 'little', 'punnet', 'peppercorn', 'shelled', 'saffron', 'other''chopped', 'salt', 'olive', 'taste', 'can', 'sauce', 'water', 'diced', 'package', 'italian', 'shredded', 'divided', 'parsley', 'vinegar', 'all', 'purpose', 'crushed', 'juice', 'more', 'coriander', 'bell', 'needed', 'thinly', 'boneless', 'half', 'thyme', 'cubed', 'cinnamon', 'cilantro', 'jar', 'seasoning', 'rosemary', 'extract', 'sweet', 'baking', 'beaten', 'heavy', 'seeded', 'tin', 'vanilla', 'uncooked', 'crumb', 'style', 'thin', 'nut', 'coarsely', 'spring', 'chili', 'cornstarch', 'strip', 'cardamom', 'rinsed', 'honey', 'cherry', 'root', 'quartered', 'head', 'softened', 'container', 'crumbled', 'frying', 'lean', 'cooking', 'roasted', 'warm', 'whipping', 'thawed', 'corn', 'pitted', 'sun', 'kosher', 'bite', 'toasted', 'lasagna', 'split', 'melted', 'degree', 'lengthwise', 'romano', 'packed', 'pod', 'anchovy', 'rom', 'prepared', 'juiced', 'fluid', 'floret', 'room', 'active', 'seasoned', 'mix', 'deveined', 'lightly', 'anise', 'thai', 'size', 'unsweetened', 'torn', 'wedge', 'sour', 'basmati', 'marinara', 'dark', 'temperature', 'garnish', 'bouillon', 'loaf', 'shell', 'reggiano', 'canola', 'parmigiano', 'round', 'canned', 'ghee', 'crust', 'long', 'broken', 'ketchup', 'bulk', 'cleaned', 'condensed', 'sherry', 'provolone', 'cold', 'soda', 'cottage', 'spray', 'tamarind', 'pecorino', 'shortening', 'part', 'bottle', 'sodium', 'cocoa', 'grain', 'french', 'roast', 'stem', 'link', 'firm', 'asafoetida', 'mild', 'dash', 'boiling']
if isinstance(ingreds, list):
ingredients = ingreds
else:
ingredients = ast.literal_eval(ingreds)
translator = str.maketrans('', '', string.punctuation)
lemmatizer = WordNetLemmatizer()
ingred_list = []
for i in ingredients:
i.translate(translator)
# We split up with hyphens as well as spaces
items = re.split(' |-', i)
# Get rid of words containing non alphabet letters
items = [word for word in items if word.isalpha()]
# Turn everything to lowercase
items = [word.lower() for word in items]
# remove accents
items = [unidecode.unidecode(word) for word in items] #''.join((c for c in unicodedata.normalize('NFD', items) if unicodedata.category(c) != 'Mn'))
# Lemmatize words so we can compare words to measuring words
items = [lemmatizer.lemmatize(word) for word in items]
# Gets rid of measuring words/phrases, e.g. heaped teaspoon
items = [word for word in items if word not in measures]
# Get rid of common easy words
items = [word for word in items if word not in words_to_remove]
if items:
ingred_list.append(' '.join(items))
ingred_list = " ".join(ingred_list)
return ingred_list
# load in tdidf model and encodings
def scorefunc(ingredients):
with open('tfidf_encodings.pkl', 'rb') as f:
tfidf_encodings = pickle.load(f)
with open('tfidf_model.pkl', "rb") as f:
tfidf = pickle.load(f)
# parse the ingredients using my ingredient_parser
try:
ingredients_parsed = ingredient_parser(ingredients)
except:
ingredients_parsed = ingredient_parser([ingredients])
# use our pretrained tfidf model to encode our input ingredients
ingredients_tfidf = tfidf.transform([ingredients_parsed])
# calculate cosine similarity between actual recipe ingreds and test ingreds
cos_sim = map(lambda x: cosine_similarity(ingredients_tfidf, x), tfidf_encodings)
scores = list(cos_sim)
return scores
def get_recommendations(N, scores):
# load in recipe dataset
df_recipes = pd.read_csv('JO_parsed.csv')
# order the scores with and filter to get the highest N scores(desc order)
top = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:N]
# create dataframe to load in recommendations
recommendation = pd.DataFrame(columns = ['Recipe', 'Ingredients', 'Score', 'Url'])
count = 0
for i in top:
recommendation.at[count, 'Recipe'] = df_recipes['recipe_name'][i]
recommendation.at[count, 'Ingredients'] = df_recipes['ingredients'][i]
recommendation.at[count, 'Url'] = df_recipes['recipe_urls'][i]
recommendation.at[count, 'Score'] = "{:.3f}".format(float(scores[i]))
count += 1
return recommendation
def app():
st.markdown("*Recipe Recommendation System*")
recipe_box = st.selectbox(
"Display the top 5 recommendations or pick a particular recipe",
["Show the top picks", "Select a single recipe"],
)
if recipe_box =="Show the top picks":
N=5
else:
N=1
ing = st.text_input("Enter the ingredients you would like to cook with")
if ing:
scores = scorefunc(ing)
rec = get_recommendations(N,scores)
st.write("These are some recommendation(s) for you")
st.write(rec.head(N))