mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(hooks): add duration_ms to post_tool_call + transform_tool_result
Plugin hooks fired after a tool dispatch now receive an integer
duration_ms kwarg measuring how long the tool's registry.dispatch()
call took (time.monotonic() before/after). Inspired by Claude Code
2.1.119 which added the same field to PostToolUse hook inputs.
Wire points:
- model_tools.py: measure dispatch latency, pass duration_ms to
invoke_hook("post_tool_call", ...) and invoke_hook("transform_tool_result", ...)
- hermes_cli/hooks.py: include duration_ms in the synthetic payload
used by 'hermes hooks test' and 'hermes hooks doctor' so shell-hook
authors see the same shape at development time as runtime
- shell hooks (agent/shell_hooks.py): no code change needed;
_serialize_payload already surfaces non-top-level kwargs under
payload['extra'], so duration_ms lands at extra.duration_ms for
shell-hook scripts
Plugin authors can now build latency dashboards, per-tool SLO alerts,
and regression canaries without having to wrap every tool manually.
Test: tests/test_model_tools.py::test_post_tool_call_receives_non_negative_integer_duration_ms
E2E: real PluginManager + dispatch monkey-patched with a 50ms sleep,
hook callback observes duration_ms=50 (int).
Refs: https://code.claude.com/docs/en/changelog (2.1.119, Apr 23 2026)
This commit is contained in:
parent
13038dc747
commit
0f82c757e0
5 changed files with 52 additions and 6 deletions
|
|
@ -125,6 +125,7 @@ _DEFAULT_PAYLOADS = {
|
||||||
"task_id": "test-task",
|
"task_id": "test-task",
|
||||||
"tool_call_id": "test-call",
|
"tool_call_id": "test-call",
|
||||||
"result": '{"output": "hello"}',
|
"result": '{"output": "hello"}',
|
||||||
|
"duration_ms": 42,
|
||||||
},
|
},
|
||||||
"pre_llm_call": {
|
"pre_llm_call": {
|
||||||
"session_id": "test-session",
|
"session_id": "test-session",
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ import json
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
from typing import Dict, Any, List, Optional, Tuple
|
from typing import Dict, Any, List, Optional, Tuple
|
||||||
|
|
||||||
from tools.registry import discover_builtin_tools, registry
|
from tools.registry import discover_builtin_tools, registry
|
||||||
|
|
@ -563,6 +564,14 @@ def handle_function_call(
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # file_tools may not be loaded yet
|
pass # file_tools may not be loaded yet
|
||||||
|
|
||||||
|
# Measure tool dispatch latency so post_tool_call and
|
||||||
|
# transform_tool_result hooks can observe per-tool duration.
|
||||||
|
# Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
|
||||||
|
# PostToolUse hook inputs so plugin authors can build latency
|
||||||
|
# dashboards, budget alerts, and regression canaries without having
|
||||||
|
# to wrap every tool manually. We use monotonic() so the value is
|
||||||
|
# unaffected by wall-clock adjustments during the call.
|
||||||
|
_dispatch_start = time.monotonic()
|
||||||
if function_name == "execute_code":
|
if function_name == "execute_code":
|
||||||
# Prefer the caller-provided list so subagents can't overwrite
|
# Prefer the caller-provided list so subagents can't overwrite
|
||||||
# the parent's tool set via the process-global.
|
# the parent's tool set via the process-global.
|
||||||
|
|
@ -578,6 +587,7 @@ def handle_function_call(
|
||||||
task_id=task_id,
|
task_id=task_id,
|
||||||
user_task=user_task,
|
user_task=user_task,
|
||||||
)
|
)
|
||||||
|
duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from hermes_cli.plugins import invoke_hook
|
from hermes_cli.plugins import invoke_hook
|
||||||
|
|
@ -589,6 +599,7 @@ def handle_function_call(
|
||||||
task_id=task_id or "",
|
task_id=task_id or "",
|
||||||
session_id=session_id or "",
|
session_id=session_id or "",
|
||||||
tool_call_id=tool_call_id or "",
|
tool_call_id=tool_call_id or "",
|
||||||
|
duration_ms=duration_ms,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
@ -609,6 +620,7 @@ def handle_function_call(
|
||||||
task_id=task_id or "",
|
task_id=task_id or "",
|
||||||
session_id=session_id or "",
|
session_id=session_id or "",
|
||||||
tool_call_id=tool_call_id or "",
|
tool_call_id=tool_call_id or "",
|
||||||
|
duration_ms=duration_ms,
|
||||||
)
|
)
|
||||||
for hook_result in hook_results:
|
for hook_result in hook_results:
|
||||||
if isinstance(hook_result, str):
|
if isinstance(hook_result, str):
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
"""Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets."""
|
"""Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets."""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from unittest.mock import call, patch
|
from unittest.mock import ANY, call, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
@ -71,6 +71,7 @@ class TestHandleFunctionCall:
|
||||||
task_id="task-1",
|
task_id="task-1",
|
||||||
session_id="session-1",
|
session_id="session-1",
|
||||||
tool_call_id="call-1",
|
tool_call_id="call-1",
|
||||||
|
duration_ms=ANY,
|
||||||
),
|
),
|
||||||
call(
|
call(
|
||||||
"transform_tool_result",
|
"transform_tool_result",
|
||||||
|
|
@ -80,9 +81,37 @@ class TestHandleFunctionCall:
|
||||||
task_id="task-1",
|
task_id="task-1",
|
||||||
session_id="session-1",
|
session_id="session-1",
|
||||||
tool_call_id="call-1",
|
tool_call_id="call-1",
|
||||||
|
duration_ms=ANY,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def test_post_tool_call_receives_non_negative_integer_duration_ms(self):
|
||||||
|
"""Regression: post_tool_call and transform_tool_result hooks must
|
||||||
|
receive a non-negative integer ``duration_ms`` kwarg measuring
|
||||||
|
dispatch latency. Inspired by Claude Code 2.1.119, which added
|
||||||
|
``duration_ms`` to its PostToolUse hook inputs.
|
||||||
|
"""
|
||||||
|
with (
|
||||||
|
patch("model_tools.registry.dispatch", return_value='{"ok":true}'),
|
||||||
|
patch("hermes_cli.plugins.invoke_hook") as mock_invoke_hook,
|
||||||
|
):
|
||||||
|
handle_function_call("web_search", {"q": "test"}, task_id="t1")
|
||||||
|
|
||||||
|
kwargs_by_hook = {
|
||||||
|
c.args[0]: c.kwargs for c in mock_invoke_hook.call_args_list
|
||||||
|
}
|
||||||
|
assert "duration_ms" in kwargs_by_hook["post_tool_call"]
|
||||||
|
assert "duration_ms" in kwargs_by_hook["transform_tool_result"]
|
||||||
|
|
||||||
|
post_duration = kwargs_by_hook["post_tool_call"]["duration_ms"]
|
||||||
|
transform_duration = kwargs_by_hook["transform_tool_result"]["duration_ms"]
|
||||||
|
assert isinstance(post_duration, int)
|
||||||
|
assert post_duration >= 0
|
||||||
|
# Both hooks should observe the same measured duration.
|
||||||
|
assert post_duration == transform_duration
|
||||||
|
# pre_tool_call does NOT get duration_ms (nothing has run yet).
|
||||||
|
assert "duration_ms" not in kwargs_by_hook["pre_tool_call"]
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Agent loop tools
|
# Agent loop tools
|
||||||
|
|
|
||||||
|
|
@ -414,7 +414,7 @@ Each hook is documented in full on the **[Event Hooks reference](/docs/user-guid
|
||||||
| Hook | Fires when | Callback signature | Returns |
|
| Hook | Fires when | Callback signature | Returns |
|
||||||
|------|-----------|-------------------|---------|
|
|------|-----------|-------------------|---------|
|
||||||
| [`pre_tool_call`](/docs/user-guide/features/hooks#pre_tool_call) | Before any tool executes | `tool_name: str, args: dict, task_id: str` | ignored |
|
| [`pre_tool_call`](/docs/user-guide/features/hooks#pre_tool_call) | Before any tool executes | `tool_name: str, args: dict, task_id: str` | ignored |
|
||||||
| [`post_tool_call`](/docs/user-guide/features/hooks#post_tool_call) | After any tool returns | `tool_name: str, args: dict, result: str, task_id: str` | ignored |
|
| [`post_tool_call`](/docs/user-guide/features/hooks#post_tool_call) | After any tool returns | `tool_name: str, args: dict, result: str, task_id: str, duration_ms: int` | ignored |
|
||||||
| [`pre_llm_call`](/docs/user-guide/features/hooks#pre_llm_call) | Once per turn, before the tool-calling loop | `session_id: str, user_message: str, conversation_history: list, is_first_turn: bool, model: str, platform: str` | [context injection](#pre_llm_call-context-injection) |
|
| [`pre_llm_call`](/docs/user-guide/features/hooks#pre_llm_call) | Once per turn, before the tool-calling loop | `session_id: str, user_message: str, conversation_history: list, is_first_turn: bool, model: str, platform: str` | [context injection](#pre_llm_call-context-injection) |
|
||||||
| [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored |
|
| [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored |
|
||||||
| [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored |
|
| [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored |
|
||||||
|
|
|
||||||
|
|
@ -317,7 +317,8 @@ Fires **immediately after** every tool execution returns.
|
||||||
**Callback signature:**
|
**Callback signature:**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def my_callback(tool_name: str, args: dict, result: str, task_id: str, **kwargs):
|
def my_callback(tool_name: str, args: dict, result: str, task_id: str,
|
||||||
|
duration_ms: int, **kwargs):
|
||||||
```
|
```
|
||||||
|
|
||||||
| Parameter | Type | Description |
|
| Parameter | Type | Description |
|
||||||
|
|
@ -326,24 +327,27 @@ def my_callback(tool_name: str, args: dict, result: str, task_id: str, **kwargs)
|
||||||
| `args` | `dict` | The arguments the model passed to the tool |
|
| `args` | `dict` | The arguments the model passed to the tool |
|
||||||
| `result` | `str` | The tool's return value (always a JSON string) |
|
| `result` | `str` | The tool's return value (always a JSON string) |
|
||||||
| `task_id` | `str` | Session/task identifier. Empty string if not set. |
|
| `task_id` | `str` | Session/task identifier. Empty string if not set. |
|
||||||
|
| `duration_ms` | `int` | How long the tool's dispatch took, in milliseconds (measured with `time.monotonic()` around `registry.dispatch()`). |
|
||||||
|
|
||||||
**Fires:** In `model_tools.py`, inside `handle_function_call()`, after the tool's handler returns. Fires once per tool call. Does **not** fire if the tool raised an unhandled exception (the error is caught and returned as an error JSON string instead, and `post_tool_call` fires with that error string as `result`).
|
**Fires:** In `model_tools.py`, inside `handle_function_call()`, after the tool's handler returns. Fires once per tool call. Does **not** fire if the tool raised an unhandled exception (the error is caught and returned as an error JSON string instead, and `post_tool_call` fires with that error string as `result`).
|
||||||
|
|
||||||
**Return value:** Ignored.
|
**Return value:** Ignored.
|
||||||
|
|
||||||
**Use cases:** Logging tool results, metrics collection, tracking tool success/failure rates, sending notifications when specific tools complete.
|
**Use cases:** Logging tool results, metrics collection, tracking tool success/failure rates, latency dashboards, per-tool budget alerts, sending notifications when specific tools complete.
|
||||||
|
|
||||||
**Example — track tool usage metrics:**
|
**Example — track tool usage metrics:**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from collections import Counter
|
from collections import Counter, defaultdict
|
||||||
import json
|
import json
|
||||||
|
|
||||||
_tool_counts = Counter()
|
_tool_counts = Counter()
|
||||||
_error_counts = Counter()
|
_error_counts = Counter()
|
||||||
|
_latency_ms = defaultdict(list)
|
||||||
|
|
||||||
def track_metrics(tool_name, result, **kwargs):
|
def track_metrics(tool_name, result, duration_ms=0, **kwargs):
|
||||||
_tool_counts[tool_name] += 1
|
_tool_counts[tool_name] += 1
|
||||||
|
_latency_ms[tool_name].append(duration_ms)
|
||||||
try:
|
try:
|
||||||
parsed = json.loads(result)
|
parsed = json.loads(result)
|
||||||
if "error" in parsed:
|
if "error" in parsed:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue