amoldwalunj's picture
Create app.py
fd7fdc2
import streamlit as st
import pandas as pd
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import time
from concurrent.futures import ThreadPoolExecutor
def process_string(s):
return s.lower().replace('&', 'and')
#@st.cache
@st.cache_data
def load_model():
return SentenceTransformer(r"finetiuned_model")
def process_embedding(ingredient, model):
processed_ingredient = process_string(ingredient)
return model.encode([processed_ingredient]).tolist()
def faiss_query(xq, index, top_k=1):
distances, indices = index.search(np.array(xq).astype('float32'), top_k)
return distances[0], indices[0]
def get_top_matches(ingredients_flat, ingredients, loaded_model, index):
matches = []
scores = []
# Generate embeddings in parallel
with ThreadPoolExecutor() as executor:
embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients))
# Query Faiss in parallel
results = []
with ThreadPoolExecutor() as executor:
results = list(executor.map(lambda xq: faiss_query(xq, index), embeddings))
# Extract matches and scores
for distances, indices in results:
if indices.size > 0:
match = ingredients_flat[indices[0]]
matches.append(match)
scores.append(round(1 - distances[0] / 2, 2))
return matches, scores
# Load the Faiss index from disk
index = faiss.read_index('faiss_index.bin')
# Load the metadata from the JSON file
with open('metadata_faiss.json', 'r') as f:
metadata = json.load(f)
ingredients_flat = [item["Ingredient"] for item in metadata]
loaded_model = load_model()
def main():
#st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide")
st.title("Ingredients name matching App :smiley:")
st.header("Matches using embeddings (semantic search)")
st.write("Enter the JSON input:")
json_input = st.text_area("")
if st.button("Process"):
start_time = time.time()
with st.spinner("Processing..."):
try:
input_data = json.loads(json_input)
for menu_item in input_data:
ing_list = menu_item.get("ingredients", [])
matches, scores = get_top_matches(ingredients_flat, ing_list, loaded_model, index)
menu_item["Ingradients_matched"] = matches
menu_item["scores"] = scores
#st.write("Processed JSON:")
#st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True)
output_df = pd.DataFrame(input_data)
st.write("Processed Data:")
st.write(output_df)
except json.JSONDecodeError:
st.error("Invalid JSON input. Please check and try again.")
end_time = time.time()
st.write(f"Processing time: {end_time - start_time:.2f} seconds")
if __name__ == "__main__":
main()