File size: 813 Bytes
cd20a25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import numpy as np
from typing import Sequence


class Vectorizer:
    def __init__(self, model) -> None:
        """
        Initialize the vectorizer with a pre-trained embedding model.
        Args:
            model: The pre-trained embedding model to use for transforming prompts.
        """
        self.model = model

    def transform(self, prompts: Sequence[str]) -> np.ndarray:
        """
        Transform texts into numerical vectors using the specified model.
        Args:
            prompts: The sequence of raw corpus prompts.
        Returns:
            Vectorized prompts as a numpy array.
        """
        # Using 'encode' method for SentenceTransformer model; may need updating for other models (e.g. 'embed')
        return np.array(self.model.encode(prompts, show_progress_bar=True))