diff --git a/plugins/observability/nemo_relay/README.md b/plugins/observability/nemo_relay/README.md index b5376696213..b4c6e34d646 100644 --- a/plugins/observability/nemo_relay/README.md +++ b/plugins/observability/nemo_relay/README.md @@ -173,8 +173,8 @@ include an adaptive component in the same `plugins.toml`: kind = "adaptive" enabled = true -[components.config] -mode = "route" +[components.config.tool_parallelism] +mode = "observe_only" ``` When the adaptive component is enabled and the installed NeMo Relay runtime @@ -182,15 +182,16 @@ exposes `llm.execute(...)` / `tools.execute(...)`, Hermes routes LLM and tool execution through those middleware boundaries. The observer hooks still emit session, turn, approval, and subagent marks; the plugin skips its manual `llm.call` and `tools.call` spans for executions that are already managed by -NeMo Relay. +NeMo Relay. `tool_parallelism.mode = "observe_only"` keeps tool scheduling +observational while still wrapping the real execution boundary. For the full generic Hermes middleware contract, see [`docs/middleware/README.md`](../../../docs/middleware/README.md). ## Canonical Local Examples -The examples below use the official `nemo-relay==0.3` distribution and a local -Ollama model served through the OpenAI-compatible API. +The observe-only examples in this section use the official `nemo-relay==0.3` +distribution and a local Ollama model served through the OpenAI-compatible API. ```bash pip install "nemo-relay==0.3" @@ -404,8 +405,8 @@ version = 1 kind = "adaptive" enabled = true -[components.config] -mode = "route" +[components.config.tool_parallelism] +mode = "observe_only" ``` Enable it for Hermes: @@ -438,11 +439,12 @@ for the same execution. ### Local Adaptive E2E This example enables both NeMo Relay observability export and adaptive execution -middleware for a local Hermes run. +middleware for a local Hermes run. This path requires a NeMo Relay runtime that +supports `[components.config.tool_parallelism]`; the `nemo-relay==0.3` +install used by the earlier observability-only examples does not support this +adaptive config. ```bash -pip install "nemo-relay==0.3" - export HERMES_HOME=/tmp/hermes-middleware-test/hermes-home mkdir -p "$HERMES_HOME" /tmp/hermes-middleware-test/nemo-relay @@ -484,8 +486,8 @@ agent_version = "local" kind = "adaptive" enabled = true -[components.config] -mode = "route" +[components.config.tool_parallelism] +mode = "observe_only" TOML export HERMES_NEMO_RELAY_PLUGINS_TOML=/tmp/hermes-middleware-test/nemo-relay/plugins.toml @@ -510,8 +512,8 @@ middleware_execution_ok Expected ATOF shape: ```jsonl -{"kind":"scope","category":"llm","name":"custom","scope_category":"start","metadata":{"session_id":"middleware-demo-session"},"data":{"mode":"route"}} -{"kind":"scope","category":"tool","name":"terminal","scope_category":"start","metadata":{"session_id":"middleware-demo-session","tool_call_id":"call_terminal"},"data":{"mode":"route"}} +{"kind":"scope","category":"llm","name":"custom","scope_category":"start","metadata":{"session_id":"middleware-demo-session"},"data":{"mode":"observe_only"}} +{"kind":"scope","category":"tool","name":"terminal","scope_category":"start","metadata":{"session_id":"middleware-demo-session","tool_call_id":"call_terminal"},"data":{"mode":"observe_only"}} {"kind":"scope","category":"tool","name":"terminal","scope_category":"end","metadata":{"session_id":"middleware-demo-session","tool_call_id":"call_terminal","status":"ok"},"data":"{\"output\":\"middleware_execution_ok\",\"exit_code\":0,\"error\":null}"} ``` diff --git a/plugins/observability/nemo_relay/__init__.py b/plugins/observability/nemo_relay/__init__.py index cd1587fdab0..fb2d76edcf2 100644 --- a/plugins/observability/nemo_relay/__init__.py +++ b/plugins/observability/nemo_relay/__init__.py @@ -44,7 +44,7 @@ class _Settings: plugins_toml_path: str = "" plugins_config: dict[str, Any] | None = None adaptive_enabled: bool = False - adaptive_mode: str = "observe" + adaptive_mode: str = "observe_only" atof_enabled: bool = False atof_output_directory: str = "" atof_filename: str = "hermes-atof.jsonl" @@ -611,11 +611,16 @@ def _enabled_component_config( def _adaptive_mode(config: dict[str, Any] | None) -> str: if not isinstance(config, dict): - return "observe" + return "observe_only" + tool_parallelism = config.get("tool_parallelism") + if isinstance(tool_parallelism, dict): + mode = tool_parallelism.get("mode") + if isinstance(mode, str) and mode.strip(): + return mode.strip() mode = config.get("mode") if isinstance(mode, str) and mode.strip(): return mode.strip() - return "observe" + return "observe_only" def _env(name: str) -> str: diff --git a/tests/plugins/test_nemo_relay_plugin.py b/tests/plugins/test_nemo_relay_plugin.py index c4970bf2415..ed1e67cbfab 100644 --- a/tests/plugins/test_nemo_relay_plugin.py +++ b/tests/plugins/test_nemo_relay_plugin.py @@ -457,8 +457,8 @@ version = 1 kind = "adaptive" enabled = true -[components.config] -mode = "route" +[components.config.tool_parallelism] +mode = "observe_only" """, encoding="utf-8", ) @@ -506,7 +506,7 @@ mode = "route" assert response.choices == [raw_choice] assert seen_request["intercepted"] is True execute_start = next(event for event in fake.events if event[0] == "llm.execute.start") - assert execute_start[3]["data"]["mode"] == "route" + assert execute_start[3]["data"]["mode"] == "observe_only" execute_end = next(event for event in fake.events if event[0] == "llm.execute.end") assert execute_end[2] == { "model": "demo-model", @@ -527,6 +527,84 @@ mode = "route" } +def _adaptive_llm_execute_mode(tmp_path, monkeypatch, plugins_toml_text: str) -> str: + fake = _FakeNemoRelay() + plugin = _fresh_plugin(monkeypatch, fake) + plugins_toml = tmp_path / "plugins.toml" + plugins_toml.write_text(plugins_toml_text, encoding="utf-8") + monkeypatch.setenv("HERMES_NEMO_RELAY_PLUGINS_TOML", str(plugins_toml)) + + plugin.on_llm_execution_middleware( + session_id="s1", + provider="anthropic", + model="demo-model", + request={"messages": [{"role": "user", "content": "hi"}]}, + next_call=lambda request: {"raw": request}, + ) + + execute_start = next(event for event in fake.events if event[0] == "llm.execute.start") + return execute_start[3]["data"]["mode"] + + +def test_nemo_relay_adaptive_llm_execution_middleware_defaults_to_observe_only_when_mode_is_unset( + tmp_path, monkeypatch +): + mode = _adaptive_llm_execute_mode( + tmp_path, + monkeypatch, + """ +version = 1 + +[[components]] +kind = "adaptive" +enabled = true + +[components.config] +version = 1 +""", + ) + assert mode == "observe_only" + + +def test_nemo_relay_adaptive_llm_execution_middleware_accepts_legacy_top_level_mode(tmp_path, monkeypatch): + mode = _adaptive_llm_execute_mode( + tmp_path, + monkeypatch, + """ +version = 1 + +[[components]] +kind = "adaptive" +enabled = true + +[components.config] +mode = "route" +""", + ) + assert mode == "route" + + +def test_nemo_relay_adaptive_llm_execution_middleware_prefers_tool_parallelism_mode(tmp_path, monkeypatch): + mode = _adaptive_llm_execute_mode( + tmp_path, + monkeypatch, + """ +version = 1 + +[[components]] +kind = "adaptive" +enabled = true + +[components.config] +mode = "route" + +[components.config.tool_parallelism] +mode = "schedule" +""", + ) + assert mode == "schedule" + + def test_nemo_relay_llm_execution_middleware_calls_through_without_adaptive(monkeypatch): fake = _FakeNemoRelay() plugin = _fresh_plugin(monkeypatch, fake) @@ -555,8 +633,8 @@ version = 1 kind = "adaptive" enabled = true -[components.config] -mode = "route" +[components.config.tool_parallelism] +mode = "observe_only" """, encoding="utf-8", ) @@ -582,7 +660,7 @@ mode = "route" assert response == {"raw": True, "args": {"command": "pwd", "intercepted": True}} assert seen_args["intercepted"] is True execute_start = next(event for event in fake.events if event[0] == "tool.execute.start") - assert execute_start[3]["data"]["mode"] == "route" + assert execute_start[3]["data"]["mode"] == "observe_only" assert execute_start[3]["data"]["tool_call_id"] == "tool-1" @@ -613,8 +691,8 @@ version = 1 kind = "adaptive" enabled = true -[components.config] -mode = "route" +[components.config.tool_parallelism] +mode = "observe_only" """, encoding="utf-8", )