import tensorflow as tf import sentencepiece as spm import numpy as np # Paths to the model, tokenizer, and metadata tflite_model_path = r"C:\Users\dejan\AppData\Local\Google\Chrome SxS\User Data\optimization_guide_model_store\43\E6DC4029A1E4B4C1\EF94C116CBE73994\model.tflite" spm_model_path = r"C:\Users\dejan\AppData\Local\Google\Chrome SxS\User Data\optimization_guide_model_store\43\E6DC4029A1E4B4C1\EF94C116CBE73994\sentencepiece.model" # Load the SentencePiece tokenizer model sp = spm.SentencePieceProcessor() sp.load(spm_model_path) # Function to preprocess text input def preprocess_text(text, sp): # Tokenize and convert to ids input_ids = sp.encode(text, out_type=int) # Ensure input is the correct shape for the model return np.array(input_ids, dtype=np.int32).reshape(1, -1) # Load the TFLite model interpreter = tf.lite.Interpreter(model_path=tflite_model_path) interpreter.allocate_tensors() # Get input and output details for model inference input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # Function to generate embeddings for a given text def generate_embeddings(text): # Preprocess text input input_data = preprocess_text(text, sp) # Adjust input tensor size if necessary interpreter.resize_tensor_input(input_details[0]['index'], input_data.shape) interpreter.allocate_tensors() # Set the input tensor with preprocessed data interpreter.set_tensor(input_details[0]['index'], input_data) # Run inference interpreter.invoke() # Extract the embedding output embedding = interpreter.get_tensor(output_details[0]['index']) return embedding # Example usage text = "Sample passage for embedding generation" embedding = generate_embeddings(text) print("Generated Embedding:", embedding)