Spaces:
Sleeping
Sleeping
import pytest | |
import numpy as np | |
from unittest.mock import MagicMock | |
from app.engine import PromptSearchEngine | |
def mock_prompts(): | |
return ["prompt 1", "prompt 2", "prompt 3"] | |
def mock_model(): | |
embedding_dim = 384 # Correct embedding dimensionality for SentenceTransformer | |
model = MagicMock() | |
model.encode = MagicMock(return_value=np.random.rand(3, embedding_dim)) | |
return model | |
def test_engine_initialization(mock_prompts, mock_model): | |
# Mock the vectorizer to use the mock model | |
PromptSearchEngine.vectorizer = MagicMock() | |
PromptSearchEngine.vectorizer.transform = MagicMock(return_value=mock_model.encode(mock_prompts)) | |
# Initialize the engine | |
engine = PromptSearchEngine(mock_prompts) | |
assert engine.prompts == mock_prompts | |
assert engine.corpus_vectors.shape == (3, 384) # Correct dimensionality | |
def test_most_similar_valid_query(mock_prompts, mock_model): | |
# Mock the vectorizer and its transform method | |
embedding_dim = 384 | |
query_embedding = np.random.rand(1, embedding_dim) | |
PromptSearchEngine.vectorizer = MagicMock() | |
PromptSearchEngine.vectorizer.transform = MagicMock(return_value=query_embedding) | |
# Initialize the engine | |
engine = PromptSearchEngine(mock_prompts) | |
engine.vectorizer = MagicMock() | |
engine.vectorizer.transform = MagicMock(return_value=query_embedding) | |
results = engine.most_similar("test query", n=2) | |
assert len(results) == 2 | |
assert all(isinstance(score, float) and isinstance(prompt, str) for score, prompt in results) | |
def test_most_similar_empty_query(mock_prompts): | |
# Mock the vectorizer to raise a ValueError for empty input | |
engine = PromptSearchEngine(mock_prompts) | |
engine.vectorizer = MagicMock() | |
engine.vectorizer.transform = MagicMock(side_effect=ValueError("Invalid query")) | |
with pytest.raises(ValueError): | |
engine.most_similar("", n=2) | |
def test_most_similar_exceeding_n(mock_prompts, mock_model): | |
# Initialize the engine | |
PromptSearchEngine.vectorizer = MagicMock() | |
engine = PromptSearchEngine(mock_prompts) | |
# Call most_similar with n greater than the number of prompts | |
results = engine.most_similar("test query", n=10) | |
assert len(results) == len(mock_prompts) # Should return at most the number of prompts | |
def test_most_similar_integration(mock_prompts): | |
engine = PromptSearchEngine(mock_prompts) | |
results = engine.most_similar("prompt 1", n=2) | |
assert len(results) == 2 | |
assert all(isinstance(score, float) and isinstance(prompt, str) for score, prompt in results) | |
assert results[0][1] == "prompt 1" | |