mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(nemo-relay): align adaptive config with tool_parallelism mode
Signed-off-by: mnajafian-nv <mnajafian@nvidia.com>
This commit is contained in:
parent
a38003be3d
commit
021d1034d0
3 changed files with 110 additions and 25 deletions
|
|
@ -173,8 +173,8 @@ include an adaptive component in the same `plugins.toml`:
|
|||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
mode = "route"
|
||||
[components.config.tool_parallelism]
|
||||
mode = "observe_only"
|
||||
```
|
||||
|
||||
When the adaptive component is enabled and the installed NeMo Relay runtime
|
||||
|
|
@ -182,15 +182,16 @@ exposes `llm.execute(...)` / `tools.execute(...)`, Hermes routes LLM and tool
|
|||
execution through those middleware boundaries. The observer hooks still emit
|
||||
session, turn, approval, and subagent marks; the plugin skips its manual
|
||||
`llm.call` and `tools.call` spans for executions that are already managed by
|
||||
NeMo Relay.
|
||||
NeMo Relay. `tool_parallelism.mode = "observe_only"` keeps tool scheduling
|
||||
observational while still wrapping the real execution boundary.
|
||||
|
||||
For the full generic Hermes middleware contract, see
|
||||
[`docs/middleware/README.md`](../../../docs/middleware/README.md).
|
||||
|
||||
## Canonical Local Examples
|
||||
|
||||
The examples below use the official `nemo-relay==0.3` distribution and a local
|
||||
Ollama model served through the OpenAI-compatible API.
|
||||
The observe-only examples in this section use the official `nemo-relay==0.3`
|
||||
distribution and a local Ollama model served through the OpenAI-compatible API.
|
||||
|
||||
```bash
|
||||
pip install "nemo-relay==0.3"
|
||||
|
|
@ -404,8 +405,8 @@ version = 1
|
|||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
mode = "route"
|
||||
[components.config.tool_parallelism]
|
||||
mode = "observe_only"
|
||||
```
|
||||
|
||||
Enable it for Hermes:
|
||||
|
|
@ -438,11 +439,12 @@ for the same execution.
|
|||
### Local Adaptive E2E
|
||||
|
||||
This example enables both NeMo Relay observability export and adaptive execution
|
||||
middleware for a local Hermes run.
|
||||
middleware for a local Hermes run. This path requires a NeMo Relay runtime that
|
||||
supports `[components.config.tool_parallelism]`; the `nemo-relay==0.3`
|
||||
install used by the earlier observability-only examples does not support this
|
||||
adaptive config.
|
||||
|
||||
```bash
|
||||
pip install "nemo-relay==0.3"
|
||||
|
||||
export HERMES_HOME=/tmp/hermes-middleware-test/hermes-home
|
||||
mkdir -p "$HERMES_HOME" /tmp/hermes-middleware-test/nemo-relay
|
||||
|
||||
|
|
@ -484,8 +486,8 @@ agent_version = "local"
|
|||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
mode = "route"
|
||||
[components.config.tool_parallelism]
|
||||
mode = "observe_only"
|
||||
TOML
|
||||
|
||||
export HERMES_NEMO_RELAY_PLUGINS_TOML=/tmp/hermes-middleware-test/nemo-relay/plugins.toml
|
||||
|
|
@ -510,8 +512,8 @@ middleware_execution_ok
|
|||
Expected ATOF shape:
|
||||
|
||||
```jsonl
|
||||
{"kind":"scope","category":"llm","name":"custom","scope_category":"start","metadata":{"session_id":"middleware-demo-session"},"data":{"mode":"route"}}
|
||||
{"kind":"scope","category":"tool","name":"terminal","scope_category":"start","metadata":{"session_id":"middleware-demo-session","tool_call_id":"call_terminal"},"data":{"mode":"route"}}
|
||||
{"kind":"scope","category":"llm","name":"custom","scope_category":"start","metadata":{"session_id":"middleware-demo-session"},"data":{"mode":"observe_only"}}
|
||||
{"kind":"scope","category":"tool","name":"terminal","scope_category":"start","metadata":{"session_id":"middleware-demo-session","tool_call_id":"call_terminal"},"data":{"mode":"observe_only"}}
|
||||
{"kind":"scope","category":"tool","name":"terminal","scope_category":"end","metadata":{"session_id":"middleware-demo-session","tool_call_id":"call_terminal","status":"ok"},"data":"{\"output\":\"middleware_execution_ok\",\"exit_code\":0,\"error\":null}"}
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class _Settings:
|
|||
plugins_toml_path: str = ""
|
||||
plugins_config: dict[str, Any] | None = None
|
||||
adaptive_enabled: bool = False
|
||||
adaptive_mode: str = "observe"
|
||||
adaptive_mode: str = "observe_only"
|
||||
atof_enabled: bool = False
|
||||
atof_output_directory: str = ""
|
||||
atof_filename: str = "hermes-atof.jsonl"
|
||||
|
|
@ -611,11 +611,16 @@ def _enabled_component_config(
|
|||
|
||||
def _adaptive_mode(config: dict[str, Any] | None) -> str:
|
||||
if not isinstance(config, dict):
|
||||
return "observe"
|
||||
return "observe_only"
|
||||
tool_parallelism = config.get("tool_parallelism")
|
||||
if isinstance(tool_parallelism, dict):
|
||||
mode = tool_parallelism.get("mode")
|
||||
if isinstance(mode, str) and mode.strip():
|
||||
return mode.strip()
|
||||
mode = config.get("mode")
|
||||
if isinstance(mode, str) and mode.strip():
|
||||
return mode.strip()
|
||||
return "observe"
|
||||
return "observe_only"
|
||||
|
||||
|
||||
def _env(name: str) -> str:
|
||||
|
|
|
|||
|
|
@ -457,8 +457,8 @@ version = 1
|
|||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
mode = "route"
|
||||
[components.config.tool_parallelism]
|
||||
mode = "observe_only"
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
|
@ -506,7 +506,7 @@ mode = "route"
|
|||
assert response.choices == [raw_choice]
|
||||
assert seen_request["intercepted"] is True
|
||||
execute_start = next(event for event in fake.events if event[0] == "llm.execute.start")
|
||||
assert execute_start[3]["data"]["mode"] == "route"
|
||||
assert execute_start[3]["data"]["mode"] == "observe_only"
|
||||
execute_end = next(event for event in fake.events if event[0] == "llm.execute.end")
|
||||
assert execute_end[2] == {
|
||||
"model": "demo-model",
|
||||
|
|
@ -527,6 +527,84 @@ mode = "route"
|
|||
}
|
||||
|
||||
|
||||
def _adaptive_llm_execute_mode(tmp_path, monkeypatch, plugins_toml_text: str) -> str:
|
||||
fake = _FakeNemoRelay()
|
||||
plugin = _fresh_plugin(monkeypatch, fake)
|
||||
plugins_toml = tmp_path / "plugins.toml"
|
||||
plugins_toml.write_text(plugins_toml_text, encoding="utf-8")
|
||||
monkeypatch.setenv("HERMES_NEMO_RELAY_PLUGINS_TOML", str(plugins_toml))
|
||||
|
||||
plugin.on_llm_execution_middleware(
|
||||
session_id="s1",
|
||||
provider="anthropic",
|
||||
model="demo-model",
|
||||
request={"messages": [{"role": "user", "content": "hi"}]},
|
||||
next_call=lambda request: {"raw": request},
|
||||
)
|
||||
|
||||
execute_start = next(event for event in fake.events if event[0] == "llm.execute.start")
|
||||
return execute_start[3]["data"]["mode"]
|
||||
|
||||
|
||||
def test_nemo_relay_adaptive_llm_execution_middleware_defaults_to_observe_only_when_mode_is_unset(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
mode = _adaptive_llm_execute_mode(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
"""
|
||||
version = 1
|
||||
|
||||
[[components]]
|
||||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
version = 1
|
||||
""",
|
||||
)
|
||||
assert mode == "observe_only"
|
||||
|
||||
|
||||
def test_nemo_relay_adaptive_llm_execution_middleware_accepts_legacy_top_level_mode(tmp_path, monkeypatch):
|
||||
mode = _adaptive_llm_execute_mode(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
"""
|
||||
version = 1
|
||||
|
||||
[[components]]
|
||||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
mode = "route"
|
||||
""",
|
||||
)
|
||||
assert mode == "route"
|
||||
|
||||
|
||||
def test_nemo_relay_adaptive_llm_execution_middleware_prefers_tool_parallelism_mode(tmp_path, monkeypatch):
|
||||
mode = _adaptive_llm_execute_mode(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
"""
|
||||
version = 1
|
||||
|
||||
[[components]]
|
||||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
mode = "route"
|
||||
|
||||
[components.config.tool_parallelism]
|
||||
mode = "schedule"
|
||||
""",
|
||||
)
|
||||
assert mode == "schedule"
|
||||
|
||||
|
||||
def test_nemo_relay_llm_execution_middleware_calls_through_without_adaptive(monkeypatch):
|
||||
fake = _FakeNemoRelay()
|
||||
plugin = _fresh_plugin(monkeypatch, fake)
|
||||
|
|
@ -555,8 +633,8 @@ version = 1
|
|||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
mode = "route"
|
||||
[components.config.tool_parallelism]
|
||||
mode = "observe_only"
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
|
@ -582,7 +660,7 @@ mode = "route"
|
|||
assert response == {"raw": True, "args": {"command": "pwd", "intercepted": True}}
|
||||
assert seen_args["intercepted"] is True
|
||||
execute_start = next(event for event in fake.events if event[0] == "tool.execute.start")
|
||||
assert execute_start[3]["data"]["mode"] == "route"
|
||||
assert execute_start[3]["data"]["mode"] == "observe_only"
|
||||
assert execute_start[3]["data"]["tool_call_id"] == "tool-1"
|
||||
|
||||
|
||||
|
|
@ -613,8 +691,8 @@ version = 1
|
|||
kind = "adaptive"
|
||||
enabled = true
|
||||
|
||||
[components.config]
|
||||
mode = "route"
|
||||
[components.config.tool_parallelism]
|
||||
mode = "observe_only"
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue