mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 02:01:47 +00:00
feat: configurable embedding infrastructure — local (fastembed) + API (OpenAI) (#675)
- Add agent/embeddings.py with Embedder protocol, FastEmbedEmbedder, OpenAIEmbedder - Factory function get_embedder() reads provider from config.yaml embeddings section - Lazy initialization — no startup impact, model loaded on first embed call - cosine_similarity() and cosine_similarity_matrix() utility functions included - Add fastembed as optional dependency in pyproject.toml - 30 unit tests, all passing Closes #675
This commit is contained in:
parent
60cba55d82
commit
cc4ead999a
3 changed files with 433 additions and 0 deletions
212
tests/test_embeddings.py
Normal file
212
tests/test_embeddings.py
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
"""Tests for agent/embeddings.py — Embedder protocol, implementations, factory, utilities."""
|
||||
|
||||
import math
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from agent.embeddings import (
|
||||
Embedder,
|
||||
FastEmbedEmbedder,
|
||||
OpenAIEmbedder,
|
||||
get_embedder,
|
||||
cosine_similarity,
|
||||
cosine_similarity_matrix,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# cosine_similarity
|
||||
# =========================================================================
|
||||
|
||||
class TestCosineSimilarity:
|
||||
def test_identical_vectors(self):
|
||||
a = [1.0, 0.0, 0.0]
|
||||
assert cosine_similarity(a, a) == pytest.approx(1.0)
|
||||
|
||||
def test_orthogonal_vectors(self):
|
||||
a = [1.0, 0.0]
|
||||
b = [0.0, 1.0]
|
||||
assert cosine_similarity(a, b) == pytest.approx(0.0)
|
||||
|
||||
def test_opposite_vectors(self):
|
||||
a = [1.0, 0.0]
|
||||
b = [-1.0, 0.0]
|
||||
assert cosine_similarity(a, b) == pytest.approx(-1.0)
|
||||
|
||||
def test_zero_vector_returns_zero(self):
|
||||
a = [0.0, 0.0]
|
||||
b = [1.0, 0.0]
|
||||
assert cosine_similarity(a, b) == 0.0
|
||||
|
||||
def test_dimension_mismatch_raises(self):
|
||||
with pytest.raises(ValueError, match="dimensions must match"):
|
||||
cosine_similarity([1.0, 2.0], [1.0, 2.0, 3.0])
|
||||
|
||||
def test_similar_vectors(self):
|
||||
a = [1.0, 1.0]
|
||||
b = [1.0, 1.1]
|
||||
sim = cosine_similarity(a, b)
|
||||
assert 0.99 < sim < 1.0
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# cosine_similarity_matrix
|
||||
# =========================================================================
|
||||
|
||||
class TestCosineSimilarityMatrix:
|
||||
def test_diagonal_is_one(self):
|
||||
vecs = [[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]
|
||||
matrix = cosine_similarity_matrix(vecs)
|
||||
for i in range(len(vecs)):
|
||||
assert matrix[i][i] == pytest.approx(1.0)
|
||||
|
||||
def test_symmetry(self):
|
||||
vecs = [[1.0, 0.0], [0.5, 0.5]]
|
||||
matrix = cosine_similarity_matrix(vecs)
|
||||
assert matrix[0][1] == pytest.approx(matrix[1][0])
|
||||
|
||||
def test_orthogonal_off_diagonal(self):
|
||||
vecs = [[1.0, 0.0], [0.0, 1.0]]
|
||||
matrix = cosine_similarity_matrix(vecs)
|
||||
assert matrix[0][1] == pytest.approx(0.0)
|
||||
|
||||
def test_shape(self):
|
||||
vecs = [[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]
|
||||
matrix = cosine_similarity_matrix(vecs)
|
||||
assert len(matrix) == 3
|
||||
assert all(len(row) == 3 for row in matrix)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# FastEmbedEmbedder
|
||||
# =========================================================================
|
||||
|
||||
class TestFastEmbedEmbedder:
|
||||
def test_default_model(self):
|
||||
emb = FastEmbedEmbedder()
|
||||
assert emb.model_name == FastEmbedEmbedder.DEFAULT_MODEL
|
||||
|
||||
def test_custom_model(self):
|
||||
emb = FastEmbedEmbedder(model="custom-model")
|
||||
assert emb.model_name == "custom-model"
|
||||
|
||||
def test_dimensions(self):
|
||||
emb = FastEmbedEmbedder()
|
||||
assert emb.dimensions == 384
|
||||
|
||||
def test_lazy_load(self):
|
||||
emb = FastEmbedEmbedder()
|
||||
assert emb._model is None
|
||||
|
||||
def test_import_error_if_not_installed(self):
|
||||
emb = FastEmbedEmbedder()
|
||||
with patch.dict("sys.modules", {"fastembed": None}):
|
||||
with pytest.raises(ImportError, match="fastembed is not installed"):
|
||||
emb._load()
|
||||
|
||||
def test_embed_text(self):
|
||||
emb = FastEmbedEmbedder()
|
||||
mock_model = MagicMock()
|
||||
# Use a simple object with .tolist() instead of numpy array
|
||||
fake_vec = MagicMock()
|
||||
fake_vec.tolist.return_value = [0.1, 0.2, 0.3]
|
||||
mock_model.embed.return_value = iter([fake_vec])
|
||||
emb._model = mock_model
|
||||
result = emb.embed_text("hello")
|
||||
assert result == pytest.approx([0.1, 0.2, 0.3])
|
||||
|
||||
def test_embed_texts(self):
|
||||
emb = FastEmbedEmbedder()
|
||||
mock_model = MagicMock()
|
||||
fake_vec1 = MagicMock()
|
||||
fake_vec1.tolist.return_value = [0.1, 0.2]
|
||||
fake_vec2 = MagicMock()
|
||||
fake_vec2.tolist.return_value = [0.3, 0.4]
|
||||
mock_model.embed.return_value = iter([fake_vec1, fake_vec2])
|
||||
emb._model = mock_model
|
||||
result = emb.embed_texts(["hello", "world"])
|
||||
assert len(result) == 2
|
||||
assert result[0] == pytest.approx([0.1, 0.2])
|
||||
assert result[1] == pytest.approx([0.3, 0.4])
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# OpenAIEmbedder
|
||||
# =========================================================================
|
||||
|
||||
class TestOpenAIEmbedder:
|
||||
def test_default_model(self):
|
||||
emb = OpenAIEmbedder()
|
||||
assert emb.model_name == OpenAIEmbedder.DEFAULT_MODEL
|
||||
|
||||
def test_dimensions_known_model(self):
|
||||
assert OpenAIEmbedder(model="text-embedding-3-small").dimensions == 1536
|
||||
assert OpenAIEmbedder(model="text-embedding-3-large").dimensions == 3072
|
||||
|
||||
def test_dimensions_unknown_model(self):
|
||||
assert OpenAIEmbedder(model="unknown-model").dimensions == 1536
|
||||
|
||||
def test_lazy_load(self):
|
||||
emb = OpenAIEmbedder()
|
||||
assert emb._client is None
|
||||
|
||||
def test_embed_text(self):
|
||||
emb = OpenAIEmbedder()
|
||||
mock_client = MagicMock()
|
||||
mock_client.embeddings.create.return_value.data = [
|
||||
MagicMock(embedding=[0.1, 0.2, 0.3])
|
||||
]
|
||||
emb._client = mock_client
|
||||
result = emb.embed_text("hello")
|
||||
assert result == [0.1, 0.2, 0.3]
|
||||
mock_client.embeddings.create.assert_called_once_with(
|
||||
input=["hello"], model=OpenAIEmbedder.DEFAULT_MODEL
|
||||
)
|
||||
|
||||
def test_embed_texts(self):
|
||||
emb = OpenAIEmbedder()
|
||||
mock_client = MagicMock()
|
||||
mock_client.embeddings.create.return_value.data = [
|
||||
MagicMock(embedding=[0.1, 0.2]),
|
||||
MagicMock(embedding=[0.3, 0.4]),
|
||||
]
|
||||
emb._client = mock_client
|
||||
result = emb.embed_texts(["hello", "world"])
|
||||
assert len(result) == 2
|
||||
assert result[0] == [0.1, 0.2]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# get_embedder factory
|
||||
# =========================================================================
|
||||
|
||||
class TestGetEmbedder:
|
||||
def test_default_returns_fastembed(self):
|
||||
emb = get_embedder({})
|
||||
assert isinstance(emb, FastEmbedEmbedder)
|
||||
|
||||
def test_local_provider(self):
|
||||
emb = get_embedder({"embeddings": {"provider": "local"}})
|
||||
assert isinstance(emb, FastEmbedEmbedder)
|
||||
|
||||
def test_local_custom_model(self):
|
||||
emb = get_embedder({"embeddings": {"provider": "local", "model": "custom-model"}})
|
||||
assert isinstance(emb, FastEmbedEmbedder)
|
||||
assert emb.model_name == "custom-model"
|
||||
|
||||
def test_openai_provider(self):
|
||||
emb = get_embedder({"embeddings": {"provider": "openai"}})
|
||||
assert isinstance(emb, OpenAIEmbedder)
|
||||
|
||||
def test_openai_custom_model(self):
|
||||
emb = get_embedder({"embeddings": {"provider": "openai", "model": "text-embedding-3-large"}})
|
||||
assert isinstance(emb, OpenAIEmbedder)
|
||||
assert emb.model_name == "text-embedding-3-large"
|
||||
|
||||
def test_unknown_provider_raises(self):
|
||||
with pytest.raises(ValueError, match="Unknown embedding provider"):
|
||||
get_embedder({"embeddings": {"provider": "unknown"}})
|
||||
|
||||
def test_embedder_protocol_compliance(self):
|
||||
emb = get_embedder({})
|
||||
assert isinstance(emb, Embedder)
|
||||
Loading…
Add table
Add a link
Reference in a new issue