Spaces:

amoldwalunj
/

ingradient_matching_faiss

Runtime error

App Files Files Community

amoldwalunj commited on Apr 25, 2023

Commit

fd7fdc2

•

1 Parent(s): 4d85c55

Create app.py

Browse files

Files changed (1) hide show

app.py +92 -0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import streamlit as st
+import pandas as pd
+import json
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+import time
+from concurrent.futures import ThreadPoolExecutor
+def process_string(s):
+    return s.lower().replace('&', 'and')
+#@st.cache
+@st.cache_data
+def load_model():
+    return SentenceTransformer(r"finetiuned_model")
+def process_embedding(ingredient, model):
+    processed_ingredient = process_string(ingredient)
+    return model.encode([processed_ingredient]).tolist()
+def faiss_query(xq, index, top_k=1):
+    distances, indices = index.search(np.array(xq).astype('float32'), top_k)
+    return distances[0], indices[0]
+def get_top_matches(ingredients_flat, ingredients, loaded_model, index):
+    matches = []
+    scores = []
+    # Generate embeddings in parallel
+    with ThreadPoolExecutor() as executor:
+        embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients))
+    # Query Faiss in parallel
+    results = []
+    with ThreadPoolExecutor() as executor:
+        results = list(executor.map(lambda xq: faiss_query(xq, index), embeddings))
+    # Extract matches and scores
+    for distances, indices in results:
+        if indices.size > 0:
+            match = ingredients_flat[indices[0]]
+            matches.append(match)
+            scores.append(round(1 - distances[0] / 2, 2))
+    return matches, scores
+# Load the Faiss index from disk
+index = faiss.read_index('faiss_index.bin')
+# Load the metadata from the JSON file
+with open('metadata_faiss.json', 'r') as f:
+    metadata = json.load(f)
+ingredients_flat = [item["Ingredient"] for item in metadata]
+loaded_model = load_model()
+def main():
+    #st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide")
+    st.title("Ingredients name matching App :smiley:")
+    st.header("Matches using embeddings (semantic search)")
+    st.write("Enter the JSON input:")
+    json_input = st.text_area("")
+    if st.button("Process"):
+        start_time = time.time()
+        with st.spinner("Processing..."):
+            try:
+                input_data = json.loads(json_input)
+                for menu_item in input_data:
+                    ing_list = menu_item.get("ingredients", [])
+                    matches, scores = get_top_matches(ingredients_flat, ing_list, loaded_model, index)
+                    menu_item["Ingradients_matched"] = matches
+                    menu_item["scores"] = scores
+                #st.write("Processed JSON:")
+                #st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True)
+                output_df = pd.DataFrame(input_data)
+                st.write("Processed Data:")
+                st.write(output_df)
+            except json.JSONDecodeError:
+                st.error("Invalid JSON input. Please check and try again.")
+        end_time = time.time()
+        st.write(f"Processing time: {end_time - start_time:.2f} seconds")
+if __name__ == "__main__":
+    main()