promptsearchengine / app /vectorizer.py
Jokica17's picture
Added backend `app` module and core engine logic:
cd20a25
raw
history blame
813 Bytes
import numpy as np
from typing import Sequence
class Vectorizer:
def __init__(self, model) -> None:
"""
Initialize the vectorizer with a pre-trained embedding model.
Args:
model: The pre-trained embedding model to use for transforming prompts.
"""
self.model = model
def transform(self, prompts: Sequence[str]) -> np.ndarray:
"""
Transform texts into numerical vectors using the specified model.
Args:
prompts: The sequence of raw corpus prompts.
Returns:
Vectorized prompts as a numpy array.
"""
# Using 'encode' method for SentenceTransformer model; may need updating for other models (e.g. 'embed')
return np.array(self.model.encode(prompts, show_progress_bar=True))