Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import json | |
import numpy as np | |
import pinecone | |
from sentence_transformers import SentenceTransformer | |
import time | |
#pinecone.init(api_key='f5112f8c-f27d-4af1-b427-0c0953c113b5', environment='asia-southeast1-gcp') | |
pinecone.init(api_key='5c5b5687-b73d-47e9-9cc8-e184ff72cc45', environment='us-central1-gcp') | |
def process_string(s): | |
return s.lower().replace('&', 'and') | |
index = pinecone.Index('ingradientsearch') | |
def load_model(): | |
return SentenceTransformer(r"finetiuned_model") | |
from concurrent.futures import ThreadPoolExecutor | |
def process_embedding(ingredient, model): | |
processed_ingredient = process_string(ingredient) | |
return model.encode([processed_ingredient]).tolist() | |
def pinecone_query(xq, index, top_k=1, includeMetadata=True): | |
return index.query(xq, top_k=top_k, includeMetadata=includeMetadata) | |
def get_top_matches(ingredients): | |
loaded_model = load_model() | |
matches = [] | |
scores = [] | |
# Generate embeddings in parallel | |
with ThreadPoolExecutor() as executor: | |
embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients)) | |
# Query Pinecone in parallel | |
results = [] | |
with ThreadPoolExecutor() as executor: | |
results = list(executor.map(lambda xq: pinecone_query(xq, index), embeddings)) | |
# Extract matches and scores | |
for result in results: | |
if result['matches']: | |
match = result['matches'][0] | |
matches.append(match['metadata']['Ingredient']) | |
scores.append(round(match['score'], 2)) | |
return matches, scores | |
def main(): | |
st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide") | |
st.title("Ingredients name matching App :smiley:") | |
st.header("Matches using embeddings (semantic search)") | |
st.write("Enter the JSON input:") | |
json_input = st.text_area("") | |
if st.button("Process"): | |
start_time = time.time() | |
with st.spinner("Processing..."): | |
try: | |
input_data = json.loads(json_input) | |
for menu_item in input_data: | |
ingredients = menu_item.get("ingredients", []) | |
matches, scores = get_top_matches(ingredients) | |
menu_item["Ingradients_matched"] = matches | |
menu_item["scores"] = scores | |
st.write("Processed JSON:") | |
#st.write(json.dumps(input_data, indent=2)) | |
st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True) | |
except json.JSONDecodeError: | |
st.error("Invalid JSON input. Please check and try again.") | |
end_time = time.time() | |
st.write(f"Processing time: {end_time - start_time:.2f} seconds") | |
if __name__ == "__main__": | |
main() | |