diff --git a/hermes_cli/hooks.py b/hermes_cli/hooks.py
index 97d9e36b3..c39a692e6 100644
--- a/hermes_cli/hooks.py
+++ b/hermes_cli/hooks.py
@@ -125,6 +125,7 @@ _DEFAULT_PAYLOADS = {
         "task_id": "test-task",
         "tool_call_id": "test-call",
         "result": '{"output": "hello"}',
+        "duration_ms": 42,
     },
     "pre_llm_call": {
         "session_id": "test-session",
diff --git a/model_tools.py b/model_tools.py
index 36cea8f30..242701628 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -24,6 +24,7 @@ import json
 import asyncio
 import logging
 import threading
+import time
 from typing import Dict, Any, List, Optional, Tuple
 
 from tools.registry import discover_builtin_tools, registry
@@ -563,6 +564,14 @@ def handle_function_call(
             except Exception:
                 pass  # file_tools may not be loaded yet
 
+        # Measure tool dispatch latency so post_tool_call and
+        # transform_tool_result hooks can observe per-tool duration.
+        # Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
+        # PostToolUse hook inputs so plugin authors can build latency
+        # dashboards, budget alerts, and regression canaries without having
+        # to wrap every tool manually.  We use monotonic() so the value is
+        # unaffected by wall-clock adjustments during the call.
+        _dispatch_start = time.monotonic()
         if function_name == "execute_code":
             # Prefer the caller-provided list so subagents can't overwrite
             # the parent's tool set via the process-global.
@@ -578,6 +587,7 @@ def handle_function_call(
                 task_id=task_id,
                 user_task=user_task,
             )
+        duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
 
         try:
             from hermes_cli.plugins import invoke_hook
@@ -589,6 +599,7 @@ def handle_function_call(
                 task_id=task_id or "",
                 session_id=session_id or "",
                 tool_call_id=tool_call_id or "",
+                duration_ms=duration_ms,
             )
         except Exception:
             pass
@@ -609,6 +620,7 @@ def handle_function_call(
                 task_id=task_id or "",
                 session_id=session_id or "",
                 tool_call_id=tool_call_id or "",
+                duration_ms=duration_ms,
             )
             for hook_result in hook_results:
                 if isinstance(hook_result, str):
diff --git a/tests/test_model_tools.py b/tests/test_model_tools.py
index 9c2764daf..c8fd3581a 100644
--- a/tests/test_model_tools.py
+++ b/tests/test_model_tools.py
@@ -1,7 +1,7 @@
 """Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets."""
 
 import json
-from unittest.mock import call, patch
+from unittest.mock import ANY, call, patch
 
 import pytest
 
@@ -71,6 +71,7 @@ class TestHandleFunctionCall:
                 task_id="task-1",
                 session_id="session-1",
                 tool_call_id="call-1",
+                duration_ms=ANY,
             ),
             call(
                 "transform_tool_result",
@@ -80,9 +81,37 @@ class TestHandleFunctionCall:
                 task_id="task-1",
                 session_id="session-1",
                 tool_call_id="call-1",
+                duration_ms=ANY,
             ),
         ]
 
+    def test_post_tool_call_receives_non_negative_integer_duration_ms(self):
+        """Regression: post_tool_call and transform_tool_result hooks must
+        receive a non-negative integer ``duration_ms`` kwarg measuring
+        dispatch latency.  Inspired by Claude Code 2.1.119, which added
+        ``duration_ms`` to its PostToolUse hook inputs.
+        """
+        with (
+            patch("model_tools.registry.dispatch", return_value='{"ok":true}'),
+            patch("hermes_cli.plugins.invoke_hook") as mock_invoke_hook,
+        ):
+            handle_function_call("web_search", {"q": "test"}, task_id="t1")
+
+        kwargs_by_hook = {
+            c.args[0]: c.kwargs for c in mock_invoke_hook.call_args_list
+        }
+        assert "duration_ms" in kwargs_by_hook["post_tool_call"]
+        assert "duration_ms" in kwargs_by_hook["transform_tool_result"]
+
+        post_duration = kwargs_by_hook["post_tool_call"]["duration_ms"]
+        transform_duration = kwargs_by_hook["transform_tool_result"]["duration_ms"]
+        assert isinstance(post_duration, int)
+        assert post_duration >= 0
+        # Both hooks should observe the same measured duration.
+        assert post_duration == transform_duration
+        # pre_tool_call does NOT get duration_ms (nothing has run yet).
+        assert "duration_ms" not in kwargs_by_hook["pre_tool_call"]
+
 
 # =========================================================================
 # Agent loop tools
diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md
index 4e2ee5cf2..6a220aba2 100644
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -414,7 +414,7 @@ Each hook is documented in full on the **[Event Hooks reference](/docs/user-guid
 | Hook | Fires when | Callback signature | Returns |
 |------|-----------|-------------------|---------|
 | [`pre_tool_call`](/docs/user-guide/features/hooks#pre_tool_call) | Before any tool executes | `tool_name: str, args: dict, task_id: str` | ignored |
-| [`post_tool_call`](/docs/user-guide/features/hooks#post_tool_call) | After any tool returns | `tool_name: str, args: dict, result: str, task_id: str` | ignored |
+| [`post_tool_call`](/docs/user-guide/features/hooks#post_tool_call) | After any tool returns | `tool_name: str, args: dict, result: str, task_id: str, duration_ms: int` | ignored |
 | [`pre_llm_call`](/docs/user-guide/features/hooks#pre_llm_call) | Once per turn, before the tool-calling loop | `session_id: str, user_message: str, conversation_history: list, is_first_turn: bool, model: str, platform: str` | [context injection](#pre_llm_call-context-injection) |
 | [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored |
 | [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored |
diff --git a/website/docs/user-guide/features/hooks.md b/website/docs/user-guide/features/hooks.md
index e7ce5063a..ba77e535f 100644
--- a/website/docs/user-guide/features/hooks.md
+++ b/website/docs/user-guide/features/hooks.md
@@ -317,7 +317,8 @@ Fires **immediately after** every tool execution returns.
 **Callback signature:**
 
 ```python
-def my_callback(tool_name: str, args: dict, result: str, task_id: str, **kwargs):
+def my_callback(tool_name: str, args: dict, result: str, task_id: str,
+                duration_ms: int, **kwargs):
 ```
 
 | Parameter | Type | Description |
@@ -326,24 +327,27 @@ def my_callback(tool_name: str, args: dict, result: str, task_id: str, **kwargs)
 | `args` | `dict` | The arguments the model passed to the tool |
 | `result` | `str` | The tool's return value (always a JSON string) |
 | `task_id` | `str` | Session/task identifier. Empty string if not set. |
+| `duration_ms` | `int` | How long the tool's dispatch took, in milliseconds (measured with `time.monotonic()` around `registry.dispatch()`). |
 
 **Fires:** In `model_tools.py`, inside `handle_function_call()`, after the tool's handler returns. Fires once per tool call. Does **not** fire if the tool raised an unhandled exception (the error is caught and returned as an error JSON string instead, and `post_tool_call` fires with that error string as `result`).
 
 **Return value:** Ignored.
 
-**Use cases:** Logging tool results, metrics collection, tracking tool success/failure rates, sending notifications when specific tools complete.
+**Use cases:** Logging tool results, metrics collection, tracking tool success/failure rates, latency dashboards, per-tool budget alerts, sending notifications when specific tools complete.
 
 **Example — track tool usage metrics:**
 
 ```python
-from collections import Counter
+from collections import Counter, defaultdict
 import json
 
 _tool_counts = Counter()
 _error_counts = Counter()
+_latency_ms = defaultdict(list)
 
-def track_metrics(tool_name, result, **kwargs):
+def track_metrics(tool_name, result, duration_ms=0, **kwargs):
     _tool_counts[tool_name] += 1
+    _latency_ms[tool_name].append(duration_ms)
     try:
         parsed = json.loads(result)
         if "error" in parsed: