amoldwalunj's picture
Update app.py
8354095
import streamlit as st
import pandas as pd
import json
import numpy as np
import pinecone
from sentence_transformers import SentenceTransformer
import time
#pinecone.init(api_key='f5112f8c-f27d-4af1-b427-0c0953c113b5', environment='asia-southeast1-gcp')
pinecone.init(api_key='5c5b5687-b73d-47e9-9cc8-e184ff72cc45', environment='us-central1-gcp')
def process_string(s):
return s.lower().replace('&', 'and')
index = pinecone.Index('ingradientsearch')
@st.cache_data
def load_model():
return SentenceTransformer(r"finetiuned_model")
from concurrent.futures import ThreadPoolExecutor
def process_embedding(ingredient, model):
processed_ingredient = process_string(ingredient)
return model.encode([processed_ingredient]).tolist()
def pinecone_query(xq, index, top_k=1, includeMetadata=True):
return index.query(xq, top_k=top_k, includeMetadata=includeMetadata)
def get_top_matches(ingredients):
loaded_model = load_model()
matches = []
scores = []
# Generate embeddings in parallel
with ThreadPoolExecutor() as executor:
embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients))
# Query Pinecone in parallel
results = []
with ThreadPoolExecutor() as executor:
results = list(executor.map(lambda xq: pinecone_query(xq, index), embeddings))
# Extract matches and scores
for result in results:
if result['matches']:
match = result['matches'][0]
matches.append(match['metadata']['Ingredient'])
scores.append(round(match['score'], 2))
return matches, scores
def main():
st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide")
st.title("Ingredients name matching App :smiley:")
st.header("Matches using embeddings (semantic search)")
st.write("Enter the JSON input:")
json_input = st.text_area("")
if st.button("Process"):
start_time = time.time()
with st.spinner("Processing..."):
try:
input_data = json.loads(json_input)
for menu_item in input_data:
ingredients = menu_item.get("ingredients", [])
matches, scores = get_top_matches(ingredients)
menu_item["Ingradients_matched"] = matches
menu_item["scores"] = scores
st.write("Processed JSON:")
#st.write(json.dumps(input_data, indent=2))
st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True)
except json.JSONDecodeError:
st.error("Invalid JSON input. Please check and try again.")
end_time = time.time()
st.write(f"Processing time: {end_time - start_time:.2f} seconds")
if __name__ == "__main__":
main()