import streamlit as st import pandas as pd import json import numpy as np import faiss from sentence_transformers import SentenceTransformer import time from concurrent.futures import ThreadPoolExecutor def process_string(s): return s.lower().replace('&', 'and') #@st.cache @st.cache_data def load_model(): return SentenceTransformer(r"finetiuned_model") def process_embedding(ingredient, model): processed_ingredient = process_string(ingredient) return model.encode([processed_ingredient]).tolist() def faiss_query(xq, index, top_k=1): distances, indices = index.search(np.array(xq).astype('float32'), top_k) return distances[0], indices[0] def get_top_matches(ingredients_flat, ingredients, loaded_model, index): matches = [] scores = [] # Generate embeddings in parallel with ThreadPoolExecutor() as executor: embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients)) # Query Faiss in parallel results = [] with ThreadPoolExecutor() as executor: results = list(executor.map(lambda xq: faiss_query(xq, index), embeddings)) # Extract matches and scores for distances, indices in results: if indices.size > 0: match = ingredients_flat[indices[0]] matches.append(match) scores.append(round(1 - distances[0] / 2, 2)) return matches, scores # Load the Faiss index from disk index = faiss.read_index('faiss_index.bin') # Load the metadata from the JSON file with open('metadata_faiss.json', 'r') as f: metadata = json.load(f) ingredients_flat = [item["Ingredient"] for item in metadata] loaded_model = load_model() def main(): #st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide") st.title("Ingredients name matching App :smiley:") st.header("Matches using embeddings (semantic search)") st.write("Enter the JSON input:") json_input = st.text_area("") if st.button("Process"): start_time = time.time() with st.spinner("Processing..."): try: input_data = json.loads(json_input) for menu_item in input_data: ing_list = menu_item.get("ingredients", []) matches, scores = get_top_matches(ingredients_flat, ing_list, loaded_model, index) menu_item["Ingradients_matched"] = matches menu_item["scores"] = scores #st.write("Processed JSON:") #st.write("
" + json.dumps(input_data, indent=4) + "
", unsafe_allow_html=True) output_df = pd.DataFrame(input_data) st.write("Processed Data:") st.write(output_df) except json.JSONDecodeError: st.error("Invalid JSON input. Please check and try again.") end_time = time.time() st.write(f"Processing time: {end_time - start_time:.2f} seconds") if __name__ == "__main__": main()