hermes-agent/tests/test_embeddings.py
teyrebaz33 cc4ead999a feat: configurable embedding infrastructure — local (fastembed) + API (OpenAI) (#675)
- Add agent/embeddings.py with Embedder protocol, FastEmbedEmbedder, OpenAIEmbedder
- Factory function get_embedder() reads provider from config.yaml embeddings section
- Lazy initialization — no startup impact, model loaded on first embed call
- cosine_similarity() and cosine_similarity_matrix() utility functions included
- Add fastembed as optional dependency in pyproject.toml
- 30 unit tests, all passing

Closes #675
2026-03-10 06:56:18 -07:00

212 lines
7.2 KiB
Python

"""Tests for agent/embeddings.py — Embedder protocol, implementations, factory, utilities."""
import math
import pytest
from unittest.mock import MagicMock, patch
from agent.embeddings import (
Embedder,
FastEmbedEmbedder,
OpenAIEmbedder,
get_embedder,
cosine_similarity,
cosine_similarity_matrix,
)
# =========================================================================
# cosine_similarity
# =========================================================================
class TestCosineSimilarity:
def test_identical_vectors(self):
a = [1.0, 0.0, 0.0]
assert cosine_similarity(a, a) == pytest.approx(1.0)
def test_orthogonal_vectors(self):
a = [1.0, 0.0]
b = [0.0, 1.0]
assert cosine_similarity(a, b) == pytest.approx(0.0)
def test_opposite_vectors(self):
a = [1.0, 0.0]
b = [-1.0, 0.0]
assert cosine_similarity(a, b) == pytest.approx(-1.0)
def test_zero_vector_returns_zero(self):
a = [0.0, 0.0]
b = [1.0, 0.0]
assert cosine_similarity(a, b) == 0.0
def test_dimension_mismatch_raises(self):
with pytest.raises(ValueError, match="dimensions must match"):
cosine_similarity([1.0, 2.0], [1.0, 2.0, 3.0])
def test_similar_vectors(self):
a = [1.0, 1.0]
b = [1.0, 1.1]
sim = cosine_similarity(a, b)
assert 0.99 < sim < 1.0
# =========================================================================
# cosine_similarity_matrix
# =========================================================================
class TestCosineSimilarityMatrix:
def test_diagonal_is_one(self):
vecs = [[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]
matrix = cosine_similarity_matrix(vecs)
for i in range(len(vecs)):
assert matrix[i][i] == pytest.approx(1.0)
def test_symmetry(self):
vecs = [[1.0, 0.0], [0.5, 0.5]]
matrix = cosine_similarity_matrix(vecs)
assert matrix[0][1] == pytest.approx(matrix[1][0])
def test_orthogonal_off_diagonal(self):
vecs = [[1.0, 0.0], [0.0, 1.0]]
matrix = cosine_similarity_matrix(vecs)
assert matrix[0][1] == pytest.approx(0.0)
def test_shape(self):
vecs = [[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]
matrix = cosine_similarity_matrix(vecs)
assert len(matrix) == 3
assert all(len(row) == 3 for row in matrix)
# =========================================================================
# FastEmbedEmbedder
# =========================================================================
class TestFastEmbedEmbedder:
def test_default_model(self):
emb = FastEmbedEmbedder()
assert emb.model_name == FastEmbedEmbedder.DEFAULT_MODEL
def test_custom_model(self):
emb = FastEmbedEmbedder(model="custom-model")
assert emb.model_name == "custom-model"
def test_dimensions(self):
emb = FastEmbedEmbedder()
assert emb.dimensions == 384
def test_lazy_load(self):
emb = FastEmbedEmbedder()
assert emb._model is None
def test_import_error_if_not_installed(self):
emb = FastEmbedEmbedder()
with patch.dict("sys.modules", {"fastembed": None}):
with pytest.raises(ImportError, match="fastembed is not installed"):
emb._load()
def test_embed_text(self):
emb = FastEmbedEmbedder()
mock_model = MagicMock()
# Use a simple object with .tolist() instead of numpy array
fake_vec = MagicMock()
fake_vec.tolist.return_value = [0.1, 0.2, 0.3]
mock_model.embed.return_value = iter([fake_vec])
emb._model = mock_model
result = emb.embed_text("hello")
assert result == pytest.approx([0.1, 0.2, 0.3])
def test_embed_texts(self):
emb = FastEmbedEmbedder()
mock_model = MagicMock()
fake_vec1 = MagicMock()
fake_vec1.tolist.return_value = [0.1, 0.2]
fake_vec2 = MagicMock()
fake_vec2.tolist.return_value = [0.3, 0.4]
mock_model.embed.return_value = iter([fake_vec1, fake_vec2])
emb._model = mock_model
result = emb.embed_texts(["hello", "world"])
assert len(result) == 2
assert result[0] == pytest.approx([0.1, 0.2])
assert result[1] == pytest.approx([0.3, 0.4])
# =========================================================================
# OpenAIEmbedder
# =========================================================================
class TestOpenAIEmbedder:
def test_default_model(self):
emb = OpenAIEmbedder()
assert emb.model_name == OpenAIEmbedder.DEFAULT_MODEL
def test_dimensions_known_model(self):
assert OpenAIEmbedder(model="text-embedding-3-small").dimensions == 1536
assert OpenAIEmbedder(model="text-embedding-3-large").dimensions == 3072
def test_dimensions_unknown_model(self):
assert OpenAIEmbedder(model="unknown-model").dimensions == 1536
def test_lazy_load(self):
emb = OpenAIEmbedder()
assert emb._client is None
def test_embed_text(self):
emb = OpenAIEmbedder()
mock_client = MagicMock()
mock_client.embeddings.create.return_value.data = [
MagicMock(embedding=[0.1, 0.2, 0.3])
]
emb._client = mock_client
result = emb.embed_text("hello")
assert result == [0.1, 0.2, 0.3]
mock_client.embeddings.create.assert_called_once_with(
input=["hello"], model=OpenAIEmbedder.DEFAULT_MODEL
)
def test_embed_texts(self):
emb = OpenAIEmbedder()
mock_client = MagicMock()
mock_client.embeddings.create.return_value.data = [
MagicMock(embedding=[0.1, 0.2]),
MagicMock(embedding=[0.3, 0.4]),
]
emb._client = mock_client
result = emb.embed_texts(["hello", "world"])
assert len(result) == 2
assert result[0] == [0.1, 0.2]
# =========================================================================
# get_embedder factory
# =========================================================================
class TestGetEmbedder:
def test_default_returns_fastembed(self):
emb = get_embedder({})
assert isinstance(emb, FastEmbedEmbedder)
def test_local_provider(self):
emb = get_embedder({"embeddings": {"provider": "local"}})
assert isinstance(emb, FastEmbedEmbedder)
def test_local_custom_model(self):
emb = get_embedder({"embeddings": {"provider": "local", "model": "custom-model"}})
assert isinstance(emb, FastEmbedEmbedder)
assert emb.model_name == "custom-model"
def test_openai_provider(self):
emb = get_embedder({"embeddings": {"provider": "openai"}})
assert isinstance(emb, OpenAIEmbedder)
def test_openai_custom_model(self):
emb = get_embedder({"embeddings": {"provider": "openai", "model": "text-embedding-3-large"}})
assert isinstance(emb, OpenAIEmbedder)
assert emb.model_name == "text-embedding-3-large"
def test_unknown_provider_raises(self):
with pytest.raises(ValueError, match="Unknown embedding provider"):
get_embedder({"embeddings": {"provider": "unknown"}})
def test_embedder_protocol_compliance(self):
emb = get_embedder({})
assert isinstance(emb, Embedder)