feat(hooks): add duration_ms to post_tool_call + transform_tool_result

Plugin hooks fired after a tool dispatch now receive an integer
duration_ms kwarg measuring how long the tool's registry.dispatch()
call took (time.monotonic() before/after). Inspired by Claude Code
2.1.119 which added the same field to PostToolUse hook inputs.

Wire points:
- model_tools.py: measure dispatch latency, pass duration_ms to
  invoke_hook("post_tool_call", ...) and invoke_hook("transform_tool_result", ...)
- hermes_cli/hooks.py: include duration_ms in the synthetic payload
  used by 'hermes hooks test' and 'hermes hooks doctor' so shell-hook
  authors see the same shape at development time as runtime
- shell hooks (agent/shell_hooks.py): no code change needed;
  _serialize_payload already surfaces non-top-level kwargs under
  payload['extra'], so duration_ms lands at extra.duration_ms for
  shell-hook scripts

Plugin authors can now build latency dashboards, per-tool SLO alerts,
and regression canaries without having to wrap every tool manually.

Test: tests/test_model_tools.py::test_post_tool_call_receives_non_negative_integer_duration_ms
E2E: real PluginManager + dispatch monkey-patched with a 50ms sleep,
hook callback observes duration_ms=50 (int).

Refs: https://code.claude.com/docs/en/changelog (2.1.119, Apr 23 2026)
This commit is contained in:
Teknium 2026-04-24 17:08:06 -07:00
parent 13038dc747
commit 0f82c757e0
No known key found for this signature in database
5 changed files with 52 additions and 6 deletions

View file

@ -125,6 +125,7 @@ _DEFAULT_PAYLOADS = {
"task_id": "test-task", "task_id": "test-task",
"tool_call_id": "test-call", "tool_call_id": "test-call",
"result": '{"output": "hello"}', "result": '{"output": "hello"}',
"duration_ms": 42,
}, },
"pre_llm_call": { "pre_llm_call": {
"session_id": "test-session", "session_id": "test-session",

View file

@ -24,6 +24,7 @@ import json
import asyncio import asyncio
import logging import logging
import threading import threading
import time
from typing import Dict, Any, List, Optional, Tuple from typing import Dict, Any, List, Optional, Tuple
from tools.registry import discover_builtin_tools, registry from tools.registry import discover_builtin_tools, registry
@ -563,6 +564,14 @@ def handle_function_call(
except Exception: except Exception:
pass # file_tools may not be loaded yet pass # file_tools may not be loaded yet
# Measure tool dispatch latency so post_tool_call and
# transform_tool_result hooks can observe per-tool duration.
# Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
# PostToolUse hook inputs so plugin authors can build latency
# dashboards, budget alerts, and regression canaries without having
# to wrap every tool manually. We use monotonic() so the value is
# unaffected by wall-clock adjustments during the call.
_dispatch_start = time.monotonic()
if function_name == "execute_code": if function_name == "execute_code":
# Prefer the caller-provided list so subagents can't overwrite # Prefer the caller-provided list so subagents can't overwrite
# the parent's tool set via the process-global. # the parent's tool set via the process-global.
@ -578,6 +587,7 @@ def handle_function_call(
task_id=task_id, task_id=task_id,
user_task=user_task, user_task=user_task,
) )
duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
try: try:
from hermes_cli.plugins import invoke_hook from hermes_cli.plugins import invoke_hook
@ -589,6 +599,7 @@ def handle_function_call(
task_id=task_id or "", task_id=task_id or "",
session_id=session_id or "", session_id=session_id or "",
tool_call_id=tool_call_id or "", tool_call_id=tool_call_id or "",
duration_ms=duration_ms,
) )
except Exception: except Exception:
pass pass
@ -609,6 +620,7 @@ def handle_function_call(
task_id=task_id or "", task_id=task_id or "",
session_id=session_id or "", session_id=session_id or "",
tool_call_id=tool_call_id or "", tool_call_id=tool_call_id or "",
duration_ms=duration_ms,
) )
for hook_result in hook_results: for hook_result in hook_results:
if isinstance(hook_result, str): if isinstance(hook_result, str):

View file

@ -1,7 +1,7 @@
"""Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets.""" """Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets."""
import json import json
from unittest.mock import call, patch from unittest.mock import ANY, call, patch
import pytest import pytest
@ -71,6 +71,7 @@ class TestHandleFunctionCall:
task_id="task-1", task_id="task-1",
session_id="session-1", session_id="session-1",
tool_call_id="call-1", tool_call_id="call-1",
duration_ms=ANY,
), ),
call( call(
"transform_tool_result", "transform_tool_result",
@ -80,9 +81,37 @@ class TestHandleFunctionCall:
task_id="task-1", task_id="task-1",
session_id="session-1", session_id="session-1",
tool_call_id="call-1", tool_call_id="call-1",
duration_ms=ANY,
), ),
] ]
def test_post_tool_call_receives_non_negative_integer_duration_ms(self):
"""Regression: post_tool_call and transform_tool_result hooks must
receive a non-negative integer ``duration_ms`` kwarg measuring
dispatch latency. Inspired by Claude Code 2.1.119, which added
``duration_ms`` to its PostToolUse hook inputs.
"""
with (
patch("model_tools.registry.dispatch", return_value='{"ok":true}'),
patch("hermes_cli.plugins.invoke_hook") as mock_invoke_hook,
):
handle_function_call("web_search", {"q": "test"}, task_id="t1")
kwargs_by_hook = {
c.args[0]: c.kwargs for c in mock_invoke_hook.call_args_list
}
assert "duration_ms" in kwargs_by_hook["post_tool_call"]
assert "duration_ms" in kwargs_by_hook["transform_tool_result"]
post_duration = kwargs_by_hook["post_tool_call"]["duration_ms"]
transform_duration = kwargs_by_hook["transform_tool_result"]["duration_ms"]
assert isinstance(post_duration, int)
assert post_duration >= 0
# Both hooks should observe the same measured duration.
assert post_duration == transform_duration
# pre_tool_call does NOT get duration_ms (nothing has run yet).
assert "duration_ms" not in kwargs_by_hook["pre_tool_call"]
# ========================================================================= # =========================================================================
# Agent loop tools # Agent loop tools

View file

@ -414,7 +414,7 @@ Each hook is documented in full on the **[Event Hooks reference](/docs/user-guid
| Hook | Fires when | Callback signature | Returns | | Hook | Fires when | Callback signature | Returns |
|------|-----------|-------------------|---------| |------|-----------|-------------------|---------|
| [`pre_tool_call`](/docs/user-guide/features/hooks#pre_tool_call) | Before any tool executes | `tool_name: str, args: dict, task_id: str` | ignored | | [`pre_tool_call`](/docs/user-guide/features/hooks#pre_tool_call) | Before any tool executes | `tool_name: str, args: dict, task_id: str` | ignored |
| [`post_tool_call`](/docs/user-guide/features/hooks#post_tool_call) | After any tool returns | `tool_name: str, args: dict, result: str, task_id: str` | ignored | | [`post_tool_call`](/docs/user-guide/features/hooks#post_tool_call) | After any tool returns | `tool_name: str, args: dict, result: str, task_id: str, duration_ms: int` | ignored |
| [`pre_llm_call`](/docs/user-guide/features/hooks#pre_llm_call) | Once per turn, before the tool-calling loop | `session_id: str, user_message: str, conversation_history: list, is_first_turn: bool, model: str, platform: str` | [context injection](#pre_llm_call-context-injection) | | [`pre_llm_call`](/docs/user-guide/features/hooks#pre_llm_call) | Once per turn, before the tool-calling loop | `session_id: str, user_message: str, conversation_history: list, is_first_turn: bool, model: str, platform: str` | [context injection](#pre_llm_call-context-injection) |
| [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored | | [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored |
| [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored | | [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored |

View file

@ -317,7 +317,8 @@ Fires **immediately after** every tool execution returns.
**Callback signature:** **Callback signature:**
```python ```python
def my_callback(tool_name: str, args: dict, result: str, task_id: str, **kwargs): def my_callback(tool_name: str, args: dict, result: str, task_id: str,
duration_ms: int, **kwargs):
``` ```
| Parameter | Type | Description | | Parameter | Type | Description |
@ -326,24 +327,27 @@ def my_callback(tool_name: str, args: dict, result: str, task_id: str, **kwargs)
| `args` | `dict` | The arguments the model passed to the tool | | `args` | `dict` | The arguments the model passed to the tool |
| `result` | `str` | The tool's return value (always a JSON string) | | `result` | `str` | The tool's return value (always a JSON string) |
| `task_id` | `str` | Session/task identifier. Empty string if not set. | | `task_id` | `str` | Session/task identifier. Empty string if not set. |
| `duration_ms` | `int` | How long the tool's dispatch took, in milliseconds (measured with `time.monotonic()` around `registry.dispatch()`). |
**Fires:** In `model_tools.py`, inside `handle_function_call()`, after the tool's handler returns. Fires once per tool call. Does **not** fire if the tool raised an unhandled exception (the error is caught and returned as an error JSON string instead, and `post_tool_call` fires with that error string as `result`). **Fires:** In `model_tools.py`, inside `handle_function_call()`, after the tool's handler returns. Fires once per tool call. Does **not** fire if the tool raised an unhandled exception (the error is caught and returned as an error JSON string instead, and `post_tool_call` fires with that error string as `result`).
**Return value:** Ignored. **Return value:** Ignored.
**Use cases:** Logging tool results, metrics collection, tracking tool success/failure rates, sending notifications when specific tools complete. **Use cases:** Logging tool results, metrics collection, tracking tool success/failure rates, latency dashboards, per-tool budget alerts, sending notifications when specific tools complete.
**Example — track tool usage metrics:** **Example — track tool usage metrics:**
```python ```python
from collections import Counter from collections import Counter, defaultdict
import json import json
_tool_counts = Counter() _tool_counts = Counter()
_error_counts = Counter() _error_counts = Counter()
_latency_ms = defaultdict(list)
def track_metrics(tool_name, result, **kwargs): def track_metrics(tool_name, result, duration_ms=0, **kwargs):
_tool_counts[tool_name] += 1 _tool_counts[tool_name] += 1
_latency_ms[tool_name].append(duration_ms)
try: try:
parsed = json.loads(result) parsed = json.loads(result)
if "error" in parsed: if "error" in parsed: