File size: 1,887 Bytes
bf51e28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import tensorflow as tf
import sentencepiece as spm
import numpy as np
# Paths to the model, tokenizer, and metadata
tflite_model_path = r"C:\Users\dejan\AppData\Local\Google\Chrome SxS\User Data\optimization_guide_model_store\43\E6DC4029A1E4B4C1\EF94C116CBE73994\model.tflite"
spm_model_path = r"C:\Users\dejan\AppData\Local\Google\Chrome SxS\User Data\optimization_guide_model_store\43\E6DC4029A1E4B4C1\EF94C116CBE73994\sentencepiece.model"
# Load the SentencePiece tokenizer model
sp = spm.SentencePieceProcessor()
sp.load(spm_model_path)
# Function to preprocess text input
def preprocess_text(text, sp):
# Tokenize and convert to ids
input_ids = sp.encode(text, out_type=int)
# Ensure input is the correct shape for the model
return np.array(input_ids, dtype=np.int32).reshape(1, -1)
# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()
# Get input and output details for model inference
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Function to generate embeddings for a given text
def generate_embeddings(text):
# Preprocess text input
input_data = preprocess_text(text, sp)
# Adjust input tensor size if necessary
interpreter.resize_tensor_input(input_details[0]['index'], input_data.shape)
interpreter.allocate_tensors()
# Set the input tensor with preprocessed data
interpreter.set_tensor(input_details[0]['index'], input_data)
# Run inference
interpreter.invoke()
# Extract the embedding output
embedding = interpreter.get_tensor(output_details[0]['index'])
return embedding
# Example usage
text = "Sample passage for embedding generation"
embedding = generate_embeddings(text)
print("Generated Embedding:", embedding)
|