File size: 2,868 Bytes
08eb2b9
 
 
 
 
 
6cf1d09
08eb2b9
8354095
 
 
08eb2b9
 
 
 
c50d017
08eb2b9
c50d017
 
 
08eb2b9
84b651f
08eb2b9
84b651f
c50d017
84b651f
08eb2b9
84b651f
 
c9c1394
c50d017
84b651f
c50d017
 
c9c1394
84b651f
c50d017
84b651f
c50d017
84b651f
 
 
 
 
 
 
 
 
 
 
c50d017
 
c9c1394
84b651f
08eb2b9
c9c1394
 
08eb2b9
 
c50d017
 
 
 
6cf1d09
c50d017
 
 
 
 
 
 
 
 
 
 
 
 
 
08eb2b9
6cf1d09
 
 
08eb2b9
84b651f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
import pandas as pd
import json
import numpy as np
import pinecone
from sentence_transformers import SentenceTransformer
import time

#pinecone.init(api_key='f5112f8c-f27d-4af1-b427-0c0953c113b5', environment='asia-southeast1-gcp')

pinecone.init(api_key='5c5b5687-b73d-47e9-9cc8-e184ff72cc45', environment='us-central1-gcp')

def process_string(s):
    return s.lower().replace('&', 'and')

index = pinecone.Index('ingradientsearch')

@st.cache_data
def load_model():
    return SentenceTransformer(r"finetiuned_model")

from concurrent.futures import ThreadPoolExecutor

def process_embedding(ingredient, model):
    processed_ingredient = process_string(ingredient)
    return model.encode([processed_ingredient]).tolist()

def pinecone_query(xq, index, top_k=1, includeMetadata=True):
    return index.query(xq, top_k=top_k, includeMetadata=includeMetadata)

def get_top_matches(ingredients):
    loaded_model = load_model()
    matches = []
    scores = []

    # Generate embeddings in parallel
    with ThreadPoolExecutor() as executor:
        embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients))

    # Query Pinecone in parallel
    results = []
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(lambda xq: pinecone_query(xq, index), embeddings))

    # Extract matches and scores
    for result in results:
        if result['matches']:
            match = result['matches'][0]
            matches.append(match['metadata']['Ingredient'])
            scores.append(round(match['score'], 2))

    return matches, scores


def main():
    st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide")
    st.title("Ingredients name matching App :smiley:")

    st.header("Matches using embeddings (semantic search)")
    st.write("Enter the JSON input:")
    json_input = st.text_area("")

    if st.button("Process"):
        start_time = time.time()
        with st.spinner("Processing..."):
            try:
                input_data = json.loads(json_input)
                for menu_item in input_data:
                    ingredients = menu_item.get("ingredients", [])
                    matches, scores = get_top_matches(ingredients)
                    menu_item["Ingradients_matched"] = matches
                    menu_item["scores"] = scores
    
                st.write("Processed JSON:")
                #st.write(json.dumps(input_data, indent=2))
                st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True)
            except json.JSONDecodeError:
                st.error("Invalid JSON input. Please check and try again.")

        end_time = time.time()
        st.write(f"Processing time: {end_time - start_time:.2f} seconds")

if __name__ == "__main__":
    main()