diff --git a/hermes_cli/hooks.py b/hermes_cli/hooks.py index 97d9e36b3..c39a692e6 100644 --- a/hermes_cli/hooks.py +++ b/hermes_cli/hooks.py @@ -125,6 +125,7 @@ _DEFAULT_PAYLOADS = { "task_id": "test-task", "tool_call_id": "test-call", "result": '{"output": "hello"}', + "duration_ms": 42, }, "pre_llm_call": { "session_id": "test-session", diff --git a/model_tools.py b/model_tools.py index 36cea8f30..242701628 100644 --- a/model_tools.py +++ b/model_tools.py @@ -24,6 +24,7 @@ import json import asyncio import logging import threading +import time from typing import Dict, Any, List, Optional, Tuple from tools.registry import discover_builtin_tools, registry @@ -563,6 +564,14 @@ def handle_function_call( except Exception: pass # file_tools may not be loaded yet + # Measure tool dispatch latency so post_tool_call and + # transform_tool_result hooks can observe per-tool duration. + # Inspired by Claude Code 2.1.119, which added ``duration_ms`` to + # PostToolUse hook inputs so plugin authors can build latency + # dashboards, budget alerts, and regression canaries without having + # to wrap every tool manually. We use monotonic() so the value is + # unaffected by wall-clock adjustments during the call. + _dispatch_start = time.monotonic() if function_name == "execute_code": # Prefer the caller-provided list so subagents can't overwrite # the parent's tool set via the process-global. @@ -578,6 +587,7 @@ def handle_function_call( task_id=task_id, user_task=user_task, ) + duration_ms = int((time.monotonic() - _dispatch_start) * 1000) try: from hermes_cli.plugins import invoke_hook @@ -589,6 +599,7 @@ def handle_function_call( task_id=task_id or "", session_id=session_id or "", tool_call_id=tool_call_id or "", + duration_ms=duration_ms, ) except Exception: pass @@ -609,6 +620,7 @@ def handle_function_call( task_id=task_id or "", session_id=session_id or "", tool_call_id=tool_call_id or "", + duration_ms=duration_ms, ) for hook_result in hook_results: if isinstance(hook_result, str): diff --git a/tests/test_model_tools.py b/tests/test_model_tools.py index 9c2764daf..c8fd3581a 100644 --- a/tests/test_model_tools.py +++ b/tests/test_model_tools.py @@ -1,7 +1,7 @@ """Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets.""" import json -from unittest.mock import call, patch +from unittest.mock import ANY, call, patch import pytest @@ -71,6 +71,7 @@ class TestHandleFunctionCall: task_id="task-1", session_id="session-1", tool_call_id="call-1", + duration_ms=ANY, ), call( "transform_tool_result", @@ -80,9 +81,37 @@ class TestHandleFunctionCall: task_id="task-1", session_id="session-1", tool_call_id="call-1", + duration_ms=ANY, ), ] + def test_post_tool_call_receives_non_negative_integer_duration_ms(self): + """Regression: post_tool_call and transform_tool_result hooks must + receive a non-negative integer ``duration_ms`` kwarg measuring + dispatch latency. Inspired by Claude Code 2.1.119, which added + ``duration_ms`` to its PostToolUse hook inputs. + """ + with ( + patch("model_tools.registry.dispatch", return_value='{"ok":true}'), + patch("hermes_cli.plugins.invoke_hook") as mock_invoke_hook, + ): + handle_function_call("web_search", {"q": "test"}, task_id="t1") + + kwargs_by_hook = { + c.args[0]: c.kwargs for c in mock_invoke_hook.call_args_list + } + assert "duration_ms" in kwargs_by_hook["post_tool_call"] + assert "duration_ms" in kwargs_by_hook["transform_tool_result"] + + post_duration = kwargs_by_hook["post_tool_call"]["duration_ms"] + transform_duration = kwargs_by_hook["transform_tool_result"]["duration_ms"] + assert isinstance(post_duration, int) + assert post_duration >= 0 + # Both hooks should observe the same measured duration. + assert post_duration == transform_duration + # pre_tool_call does NOT get duration_ms (nothing has run yet). + assert "duration_ms" not in kwargs_by_hook["pre_tool_call"] + # ========================================================================= # Agent loop tools diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md index 4e2ee5cf2..6a220aba2 100644 --- a/website/docs/guides/build-a-hermes-plugin.md +++ b/website/docs/guides/build-a-hermes-plugin.md @@ -414,7 +414,7 @@ Each hook is documented in full on the **[Event Hooks reference](/docs/user-guid | Hook | Fires when | Callback signature | Returns | |------|-----------|-------------------|---------| | [`pre_tool_call`](/docs/user-guide/features/hooks#pre_tool_call) | Before any tool executes | `tool_name: str, args: dict, task_id: str` | ignored | -| [`post_tool_call`](/docs/user-guide/features/hooks#post_tool_call) | After any tool returns | `tool_name: str, args: dict, result: str, task_id: str` | ignored | +| [`post_tool_call`](/docs/user-guide/features/hooks#post_tool_call) | After any tool returns | `tool_name: str, args: dict, result: str, task_id: str, duration_ms: int` | ignored | | [`pre_llm_call`](/docs/user-guide/features/hooks#pre_llm_call) | Once per turn, before the tool-calling loop | `session_id: str, user_message: str, conversation_history: list, is_first_turn: bool, model: str, platform: str` | [context injection](#pre_llm_call-context-injection) | | [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored | | [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored | diff --git a/website/docs/user-guide/features/hooks.md b/website/docs/user-guide/features/hooks.md index e7ce5063a..ba77e535f 100644 --- a/website/docs/user-guide/features/hooks.md +++ b/website/docs/user-guide/features/hooks.md @@ -317,7 +317,8 @@ Fires **immediately after** every tool execution returns. **Callback signature:** ```python -def my_callback(tool_name: str, args: dict, result: str, task_id: str, **kwargs): +def my_callback(tool_name: str, args: dict, result: str, task_id: str, + duration_ms: int, **kwargs): ``` | Parameter | Type | Description | @@ -326,24 +327,27 @@ def my_callback(tool_name: str, args: dict, result: str, task_id: str, **kwargs) | `args` | `dict` | The arguments the model passed to the tool | | `result` | `str` | The tool's return value (always a JSON string) | | `task_id` | `str` | Session/task identifier. Empty string if not set. | +| `duration_ms` | `int` | How long the tool's dispatch took, in milliseconds (measured with `time.monotonic()` around `registry.dispatch()`). | **Fires:** In `model_tools.py`, inside `handle_function_call()`, after the tool's handler returns. Fires once per tool call. Does **not** fire if the tool raised an unhandled exception (the error is caught and returned as an error JSON string instead, and `post_tool_call` fires with that error string as `result`). **Return value:** Ignored. -**Use cases:** Logging tool results, metrics collection, tracking tool success/failure rates, sending notifications when specific tools complete. +**Use cases:** Logging tool results, metrics collection, tracking tool success/failure rates, latency dashboards, per-tool budget alerts, sending notifications when specific tools complete. **Example — track tool usage metrics:** ```python -from collections import Counter +from collections import Counter, defaultdict import json _tool_counts = Counter() _error_counts = Counter() +_latency_ms = defaultdict(list) -def track_metrics(tool_name, result, **kwargs): +def track_metrics(tool_name, result, duration_ms=0, **kwargs): _tool_counts[tool_name] += 1 + _latency_ms[tool_name].append(duration_ms) try: parsed = json.loads(result) if "error" in parsed: