hermes-agent/tests/run_agent/test_flush_memories_codex.py

"""Tests for flush_memories() working correctly across all provider modes.

Catches the bug where Codex mode called chat.completions.create on a
Responses-only client, which would fail silently or with a 404.
"""

import json
import os
import sys
import types
from types import SimpleNamespace
from unittest.mock import patch, MagicMock, call

import pytest

sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())

from hermes_agent.agent import loop as run_agent


class _FakeOpenAI:
    def __init__(self, **kwargs):
        self.kwargs = kwargs
        self.api_key = kwargs.get("api_key", "test")
        self.base_url = kwargs.get("base_url", "http://test")

    def close(self):
        pass


def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
        {
            "type": "function",
            "function": {
                "name": "memory",
                "description": "Manage memories.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "action": {"type": "string"},
                        "target": {"type": "string"},
                        "content": {"type": "string"},
                    },
                },
            },
        },
    ])
    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)

    agent = run_agent.AIAgent(
        api_key="test-key",
        base_url="https://test.example.com/v1",
        provider=provider,
        api_mode=api_mode,
        max_iterations=4,
        quiet_mode=True,
        skip_context_files=True,
        skip_memory=True,
    )
    # Give it a valid memory store
    agent._memory_store = MagicMock()
    agent._memory_flush_min_turns = 1
    agent._user_turn_count = 5
    return agent


def _chat_response_with_memory_call():
    """Simulated chat completions response with a memory tool call."""
    return SimpleNamespace(
        choices=[SimpleNamespace(
            message=SimpleNamespace(
                content=None,
                tool_calls=[SimpleNamespace(
                    function=SimpleNamespace(
                        name="memory",
                        arguments=json.dumps({
                            "action": "add",
                            "target": "notes",
                            "content": "User prefers dark mode.",
                        }),
                    ),
                )],
            ),
        )],
        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
    )


class TestFlushMemoriesRespectsConfigTimeout:
    """flush_memories() must NOT hardcode timeout=30.0 — it should defer
    to the config value via auxiliary.flush_memories.timeout."""

    def test_auxiliary_path_omits_explicit_timeout(self, monkeypatch):
        """When calling _call_llm, timeout should NOT be passed so that
        _get_task_timeout('flush_memories') reads from config."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")

        mock_response = _chat_response_with_memory_call()

        with patch("hermes_agent.providers.auxiliary.call_llm", return_value=mock_response) as mock_call:
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi"},
                {"role": "user", "content": "Note this"},
            ]
            with patch("hermes_agent.tools.memory.memory_tool", return_value="Saved."):
                agent.flush_memories(messages)

        mock_call.assert_called_once()
        call_kwargs = mock_call.call_args
        # timeout must NOT be explicitly passed (so _get_task_timeout resolves it)
        assert "timeout" not in call_kwargs.kwargs, (
            "flush_memories should not pass explicit timeout to _call_llm; "
            "let _get_task_timeout('flush_memories') resolve from config"
        )

    def test_fallback_path_uses_config_timeout(self, monkeypatch):
        """When auxiliary client is unavailable and we fall back to direct
        OpenAI client, timeout should come from _get_task_timeout, not hardcoded."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
        agent.client = MagicMock()
        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()

        custom_timeout = 180.0

        with patch("hermes_agent.providers.auxiliary.call_llm", side_effect=RuntimeError("no provider")), \
             patch("hermes_agent.providers.auxiliary._get_task_timeout", return_value=custom_timeout) as mock_gtt, \
             patch("hermes_agent.tools.memory.memory_tool", return_value="Saved."):
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi"},
                {"role": "user", "content": "Save this"},
            ]
            agent.flush_memories(messages)

        mock_gtt.assert_called_once_with("flush_memories")
        agent.client.chat.completions.create.assert_called_once()
        call_kwargs = agent.client.chat.completions.create.call_args
        assert call_kwargs.kwargs.get("timeout") == custom_timeout, (
            f"Expected timeout={custom_timeout} from config, got {call_kwargs.kwargs.get('timeout')}"
        )


class TestFlushMemoriesUsesAuxiliaryClient:
    """When an auxiliary client is available, flush_memories should use it
    instead of self.client -- especially critical in Codex mode."""

    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")

        mock_response = _chat_response_with_memory_call()

        with patch("hermes_agent.providers.auxiliary.call_llm", return_value=mock_response) as mock_call:
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi there"},
                {"role": "user", "content": "Remember this"},
            ]
            with patch("hermes_agent.tools.memory.memory_tool", return_value="Saved.") as mock_memory:
                agent.flush_memories(messages)

        mock_call.assert_called_once()
        call_kwargs = mock_call.call_args
        assert call_kwargs.kwargs.get("task") == "flush_memories"

    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
        """Non-Codex mode with no auxiliary falls back to self.client."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
        agent.client = MagicMock()
        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()

        with patch("hermes_agent.providers.auxiliary.call_llm", side_effect=RuntimeError("no provider")):
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi there"},
                {"role": "user", "content": "Save this"},
            ]
            with patch("hermes_agent.tools.memory.memory_tool", return_value="Saved."):
                agent.flush_memories(messages)

        agent.client.chat.completions.create.assert_called_once()

    def test_flush_executes_memory_tool_calls(self, monkeypatch):
        """Verify that memory tool calls from the flush response actually get executed."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")

        mock_response = _chat_response_with_memory_call()

        with patch("hermes_agent.providers.auxiliary.call_llm", return_value=mock_response):
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi"},
                {"role": "user", "content": "Note this"},
            ]
            with patch("hermes_agent.tools.memory.memory_tool", return_value="Saved.") as mock_memory:
                agent.flush_memories(messages)

        mock_memory.assert_called_once()
        call_kwargs = mock_memory.call_args
        assert call_kwargs.kwargs["action"] == "add"
        assert call_kwargs.kwargs["target"] == "notes"
        assert "dark mode" in call_kwargs.kwargs["content"]

    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
        """After flush, the flush prompt and any response should be removed from messages."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")

        mock_response = _chat_response_with_memory_call()

        with patch("hermes_agent.providers.auxiliary.call_llm", return_value=mock_response):
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi"},
                {"role": "user", "content": "Remember X"},
            ]
            original_len = len(messages)
            with patch("hermes_agent.tools.memory.memory_tool", return_value="Saved."):
                agent.flush_memories(messages)

        # Messages should not grow from the flush
        assert len(messages) <= original_len
        # No flush sentinel should remain
        for msg in messages:
            assert "_flush_sentinel" not in msg


class TestFlushMemoriesCodexFallback:
    """When no auxiliary client exists and we're in Codex mode, flush should
    use the Codex Responses API path instead of chat.completions."""

    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")

        codex_response = SimpleNamespace(
            output=[
                SimpleNamespace(
                    type="function_call",
                    call_id="call_1",
                    name="memory",
                    arguments=json.dumps({
                        "action": "add",
                        "target": "notes",
                        "content": "Codex flush test",
                    }),
                ),
            ],
            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
            status="completed",
            model="gpt-5-codex",
        )

        with patch("hermes_agent.providers.auxiliary.call_llm", side_effect=RuntimeError("no provider")), \
             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
             patch.object(agent, "_build_api_kwargs") as mock_build, \
             patch("hermes_agent.tools.memory.memory_tool", return_value="Saved.") as mock_memory:
            mock_build.return_value = {
                "model": "gpt-5-codex",
                "instructions": "test",
                "input": [],
                "tools": [],
                "max_output_tokens": 4096,
            }
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi"},
                {"role": "user", "content": "Save this"},
            ]
            agent.flush_memories(messages)

        mock_stream.assert_called_once()
        mock_memory.assert_called_once()
        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"