Spaces:
Runtime error
Runtime error
File size: 2,868 Bytes
08eb2b9 6cf1d09 08eb2b9 8354095 08eb2b9 c50d017 08eb2b9 c50d017 08eb2b9 84b651f 08eb2b9 84b651f c50d017 84b651f 08eb2b9 84b651f c9c1394 c50d017 84b651f c50d017 c9c1394 84b651f c50d017 84b651f c50d017 84b651f c50d017 c9c1394 84b651f 08eb2b9 c9c1394 08eb2b9 c50d017 6cf1d09 c50d017 08eb2b9 6cf1d09 08eb2b9 84b651f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import streamlit as st
import pandas as pd
import json
import numpy as np
import pinecone
from sentence_transformers import SentenceTransformer
import time
#pinecone.init(api_key='f5112f8c-f27d-4af1-b427-0c0953c113b5', environment='asia-southeast1-gcp')
pinecone.init(api_key='5c5b5687-b73d-47e9-9cc8-e184ff72cc45', environment='us-central1-gcp')
def process_string(s):
return s.lower().replace('&', 'and')
index = pinecone.Index('ingradientsearch')
@st.cache_data
def load_model():
return SentenceTransformer(r"finetiuned_model")
from concurrent.futures import ThreadPoolExecutor
def process_embedding(ingredient, model):
processed_ingredient = process_string(ingredient)
return model.encode([processed_ingredient]).tolist()
def pinecone_query(xq, index, top_k=1, includeMetadata=True):
return index.query(xq, top_k=top_k, includeMetadata=includeMetadata)
def get_top_matches(ingredients):
loaded_model = load_model()
matches = []
scores = []
# Generate embeddings in parallel
with ThreadPoolExecutor() as executor:
embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients))
# Query Pinecone in parallel
results = []
with ThreadPoolExecutor() as executor:
results = list(executor.map(lambda xq: pinecone_query(xq, index), embeddings))
# Extract matches and scores
for result in results:
if result['matches']:
match = result['matches'][0]
matches.append(match['metadata']['Ingredient'])
scores.append(round(match['score'], 2))
return matches, scores
def main():
st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide")
st.title("Ingredients name matching App :smiley:")
st.header("Matches using embeddings (semantic search)")
st.write("Enter the JSON input:")
json_input = st.text_area("")
if st.button("Process"):
start_time = time.time()
with st.spinner("Processing..."):
try:
input_data = json.loads(json_input)
for menu_item in input_data:
ingredients = menu_item.get("ingredients", [])
matches, scores = get_top_matches(ingredients)
menu_item["Ingradients_matched"] = matches
menu_item["scores"] = scores
st.write("Processed JSON:")
#st.write(json.dumps(input_data, indent=2))
st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True)
except json.JSONDecodeError:
st.error("Invalid JSON input. Please check and try again.")
end_time = time.time()
st.write(f"Processing time: {end_time - start_time:.2f} seconds")
if __name__ == "__main__":
main()
|