diff --git a/docs/langfuse-tracing-local-setup.md b/docs/langfuse-tracing-local-setup.md new file mode 100644 index 0000000000..6e1fbab484 --- /dev/null +++ b/docs/langfuse-tracing-local-setup.md @@ -0,0 +1,262 @@ +# Langfuse Tracing for Hermes + +Opt-in tracing plugin that sends LLM calls, tool calls, and per-turn spans to +Langfuse. The plugin lives **outside** the hermes-agent repo so pulling +upstream updates never causes conflicts. + +--- + +## Quick start (copy-paste recipe) + +This gets you from zero to working traces. Every command is meant to be run +in order in a single terminal session. + +```bash +# ── 1. Prerequisites ────────────────────────────────────────────────── +cd /path/to/hermes-agent +source .venv/bin/activate +pip install langfuse # into the repo venv, not global + +# ── 2. Fetch the plugin source ──────────────────────────────────────── +# The plugin lives on the fork branch feat/langfuse_tracing. +# Pick ONE of the two fetch commands depending on your remote setup: + +# (a) Your origin IS the fork (kshitijk4poor/hermes-agent): +git fetch origin feat/langfuse_tracing +PLUGIN_REF="origin/feat/langfuse_tracing" + +# (b) Your origin is upstream (NousResearch/hermes-agent): +git fetch git@github.com:kshitijk4poor/hermes-agent.git \ + feat/langfuse_tracing:refs/remotes/fork/feat/langfuse_tracing +PLUGIN_REF="fork/feat/langfuse_tracing" + +# ── 3. Determine your plugin directory ──────────────────────────────── +# Hermes loads user plugins from $HERMES_HOME/plugins/. +# HERMES_HOME defaults to ~/.hermes for the default profile. +# If you use `hermes -p `, it becomes ~/.hermes/profiles//. +# The CLI sets HERMES_HOME internally — it may not be in your shell env. + +# Default profile: +PLUGIN_DIR="$HOME/.hermes/plugins/langfuse_tracing" + +# Named profile (uncomment and edit): +# PLUGIN_DIR="$HOME/.hermes/profiles//plugins/langfuse_tracing" + +# ── 4. Install the plugin ──────────────────────────────────────────── +mkdir -p "$PLUGIN_DIR" +git show "$PLUGIN_REF:.hermes/plugins/langfuse_tracing/__init__.py" \ + > "$PLUGIN_DIR/__init__.py" +git show "$PLUGIN_REF:.hermes/plugins/langfuse_tracing/plugin.yaml" \ + > "$PLUGIN_DIR/plugin.yaml" + +# ── 5. Set credentials ─────────────────────────────────────────────── +# Add these to your shell profile (~/.zshrc, ~/.bashrc, etc.) or .env. +# Tracing is completely dormant without them — no errors, no network calls. +export HERMES_LANGFUSE_ENABLED=true +export HERMES_LANGFUSE_PUBLIC_KEY=pk-lf-... +export HERMES_LANGFUSE_SECRET_KEY=sk-lf-... + +# ── 6. Verify ───────────────────────────────────────────────────────── +# Start a NEW terminal / hermes process (plugins load at startup only). +hermes plugins list # should show langfuse_tracing: enabled +HERMES_LANGFUSE_DEBUG=true hermes chat -q "hello" +# Look for: "Langfuse tracing: started trace ..." in stderr +``` + +That's it. The plugin is outside the repo tree, so `git pull upstream main` +will never touch it. + +--- + +## Updating hermes without breaking tracing + +The plugin hooks into hermes via the standard plugin system and uses `**_` in +every hook signature to absorb new kwargs. Per-API-call tracing uses +`pre_api_request` / `post_api_request` (not `pre_llm_call` / `post_llm_call`, which +are once per user turn). Those hooks receive **summary fields only** (message +counts, tool counts, token usage dict, etc.) — not full `messages`, `tools`, or +raw provider `response` objects — so keep span metadata small and the contract +stable. + +This means: + +```bash +# Just pull upstream as usual +git fetch upstream +git merge upstream/main +# or: git pull upstream main +``` + +Nothing else is needed. The plugin at `$PLUGIN_DIR` is not inside the repo, +so there are no merge conflicts. + +### Updating the plugin itself + +When the plugin code on `feat/langfuse_tracing` is updated: + +```bash +git fetch origin feat/langfuse_tracing # or the fork fetch from step 2b +git show "$PLUGIN_REF:.hermes/plugins/langfuse_tracing/__init__.py" \ + > "$PLUGIN_DIR/__init__.py" +git show "$PLUGIN_REF:.hermes/plugins/langfuse_tracing/plugin.yaml" \ + > "$PLUGIN_DIR/plugin.yaml" +# Restart hermes to pick up changes +``` + +--- + +## Alternative: symlink for plugin development + +If you're actively editing the plugin and want it version-controlled separately: + +```bash +# Create a standalone plugin repo +mkdir -p ~/Projects/hermes-langfuse-plugin/langfuse_tracing +git show "$PLUGIN_REF:.hermes/plugins/langfuse_tracing/__init__.py" \ + > ~/Projects/hermes-langfuse-plugin/langfuse_tracing/__init__.py +git show "$PLUGIN_REF:.hermes/plugins/langfuse_tracing/plugin.yaml" \ + > ~/Projects/hermes-langfuse-plugin/langfuse_tracing/plugin.yaml +cd ~/Projects/hermes-langfuse-plugin && git init && git add -A && git commit -m "init" + +# Symlink into hermes plugin dir (remove existing dir/link first) +rm -rf "$PLUGIN_DIR" +ln -s ~/Projects/hermes-langfuse-plugin/langfuse_tracing "$PLUGIN_DIR" +``` + +Edits to `~/Projects/hermes-langfuse-plugin/langfuse_tracing/` take effect on +next hermes restart. Upstream hermes updates are still conflict-free. + +--- + +## Environment variables reference + +All variables are optional. Tracing does nothing unless `ENABLED` + both keys are set. + +| Variable | Required | Default | Notes | +|----------|----------|---------|-------| +| `HERMES_LANGFUSE_ENABLED` | yes | `false` | Must be `true`/`1`/`yes`/`on` | +| `HERMES_LANGFUSE_PUBLIC_KEY` | yes | — | Langfuse project public key | +| `HERMES_LANGFUSE_SECRET_KEY` | yes | — | Langfuse project secret key | +| `HERMES_LANGFUSE_BASE_URL` | no | `https://cloud.langfuse.com` | Self-hosted Langfuse URL | +| `HERMES_LANGFUSE_ENV` | no | — | Environment tag (e.g. `development`) | +| `HERMES_LANGFUSE_RELEASE` | no | — | Release tag | +| `HERMES_LANGFUSE_SAMPLE_RATE` | no | `1.0` | Float 0.0-1.0 | +| `HERMES_LANGFUSE_MAX_CHARS` | no | `12000` | Max chars per traced value | +| `HERMES_LANGFUSE_DEBUG` | no | `false` | Verbose logging to stderr | + +Each variable also accepts `CC_LANGFUSE_*` and bare `LANGFUSE_*` prefixes as +fallbacks (checked in order: `HERMES_` > `CC_` > bare). + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---------|-------|-----| +| `hermes plugins list` doesn't show `langfuse_tracing` | Plugin files not in the right dir | Check `$PLUGIN_DIR` matches your profile. Must contain both `__init__.py` and `plugin.yaml`. | +| Listed as `disabled` | In `plugins.disabled` in config.yaml | Run `hermes plugins enable langfuse_tracing` | +| No trace output with `HERMES_LANGFUSE_DEBUG=true` | Plugin loaded but dormant | Verify all 3 required env vars are set and exported | +| `"Could not initialize Langfuse client: ..."` | Bad credentials or unreachable server | Check public/secret keys; check base URL if self-hosted | +| Traces appear but background reviews aren't tagged | `feat/turn-type-hooks` not merged upstream | Plugin still works — `turn_type` defaults to `"user"`. Background reviews just won't be filterable until the upstream PR lands. | +| Plugin works in `hermes` but not `hermes -p coder` | Profile-scoped plugin dirs | Install plugin into `~/.hermes/profiles/coder/plugins/langfuse_tracing/` | + +--- + +## Disabling tracing + +Three options, from least to most permanent: + +1. **Unset env vars** — unset `HERMES_LANGFUSE_ENABLED`. Plugin loads but does nothing. +2. **CLI toggle** — `hermes plugins disable langfuse_tracing`. Plugin is skipped at startup. +3. **Remove files** — `rm -rf "$PLUGIN_DIR"`. + +--- + +## What gets traced + +Each user turn becomes a root trace with nested child observations: + +``` +Hermes turn (or "Hermes background review") + |-- LLM call 0 (generation — with usage/cost) + |-- Tool: search_files (tool — with parsed JSON output) + |-- Tool: read_file (tool — head/tail preview, not raw content) + |-- LLM call 1 (generation) + \-- ... +``` + +Root trace metadata: `source`, `task_id`, `session_id`, `platform`, `provider`, +`model`, `api_mode`, `turn_type`. + +Tags: `hermes`, `langfuse`, plus `background_review` for auto-generated passes. + +Data normalization applied: +- Tool result JSON strings parsed into dicts +- Trailing `[Hint: ...]` extracted into `_hint` key +- `read_file` content replaced with head/tail line preview +- `base64_content` omitted (replaced with length) +- Usage/cost extracted when `agent.usage_pricing` is available + +--- + +## Running tests + +Tests live on the fork branch only — not on upstream or `main`. + +```bash +git checkout feat/langfuse_tracing +source .venv/bin/activate +python -m pytest tests/test_langfuse_tracing_plugin.py -q +``` + +12 tests covering payload parsing, observation nesting, tool call aggregation, +and `turn_type` propagation. No credentials or network access needed. + +--- + +## Project history + +### Branches + +| Branch | Remote | Purpose | +|--------|--------|---------| +| `feat/turn-type-hooks` | `origin` (fork) | Upstream PR: `turn_type` hook plumbing in `run_agent.py` + `model_tools.py` | +| `feat/langfuse_tracing` | `origin` (fork) | Plugin code, tests, optional skill, skills hub changes | + +Fork remote: `git@github.com:kshitijk4poor/hermes-agent.git` +Upstream remote: `https://github.com/NousResearch/hermes-agent.git` + +### Commit log (chronological) + +| Date | Commit | Description | +|------|--------|-------------| +| 2026-03-28 | `b0a64856` | Initial plugin + hook emission patches + langfuse dependency | +| 2026-03-28 | `e691abda` | Parse JSON tool payloads into structured data | +| 2026-03-28 | `00dbff19` | Handle trailing `[Hint: ...]` after JSON in tool outputs | +| 2026-03-28 | `fd54a008` | Fix child observation nesting (use parent span API) | +| 2026-03-28 | `8752aed1` | Format read_file traces as head/tail previews | +| 2026-03-28 | `93f9c338` | Aggregate tool calls onto root trace output | +| 2026-03-29 | `dd714b2a` | Optional skill installer + skills hub enhancements | +| 2026-03-29 | `4b2f865e` | Distinguish background review traces via `turn_type` | +| 2026-03-29 | `aef4b44d` | Upstream-clean `turn_type` hook plumbing (2 files only) | + +### File inventory + +**Plugin** (`$HERMES_HOME/plugins/langfuse_tracing/`): +`__init__.py` (hook handlers + `register()`), `plugin.yaml` (manifest) + +**Upstream PR** (`feat/turn-type-hooks`): +`run_agent.py` (+`_turn_type` attr, hook propagation), `model_tools.py` (+`turn_type` param) + +**Fork branch** (`feat/langfuse_tracing`): +`.hermes/plugins/langfuse_tracing/` (plugin source), +`optional-skills/observability/` (installer skill), +`tools/skills_hub.py` + `hermes_cli/skills_hub.py` (hub enhancements), +`tests/test_langfuse_tracing_plugin.py` + `tests/tools/test_skills_hub.py` (tests) + +### Known limitations + +1. `pre_llm_call`/`post_llm_call` fire once per user turn. Hermes (this branch) adds `pre_api_request`/`post_api_request` per actual LLM HTTP request; the Langfuse plugin on `feat/langfuse_tracing` should register those names and read the summary kwargs documented above. +2. No session-level parent trace — turns are independent, linked by `session_id` in metadata. +3. Background review filtering requires the `feat/turn-type-hooks` upstream PR. +4. Plugin is profile-scoped — must be installed per Hermes profile. diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index efe760e69b..73591443cd 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -56,8 +56,8 @@ VALID_HOOKS: Set[str] = { "post_tool_call", "pre_llm_call", "post_llm_call", - "pre_llm_request", - "post_llm_request", + "pre_api_request", + "post_api_request", "on_session_start", "on_session_end", } diff --git a/run_agent.py b/run_agent.py index b125b3a166..77b1e95c59 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2424,6 +2424,24 @@ class AIAgent: return context + def _usage_summary_for_api_request_hook(self, response: Any) -> Optional[Dict[str, Any]]: + """Token buckets for ``post_api_request`` plugins (no raw ``response`` object).""" + if response is None: + return None + raw_usage = getattr(response, "usage", None) + if not raw_usage: + return None + from dataclasses import asdict + + from agent.usage_pricing import normalize_usage + + cu = normalize_usage(raw_usage, provider=self.provider, api_mode=self.api_mode) + summary = asdict(cu) + summary.pop("raw_usage", None) + summary["prompt_tokens"] = cu.prompt_tokens + summary["total_tokens"] = cu.total_tokens + return summary + def _dump_api_request_debug( self, api_kwargs: Dict[str, Any], @@ -7281,9 +7299,9 @@ class AIAgent: api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False) try: - from hermes_cli.plugins import invoke_hook - invoke_hook( - "pre_llm_request", + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "pre_api_request", task_id=effective_task_id, session_id=self.session_id or "", platform=self.platform or "", @@ -7292,14 +7310,16 @@ class AIAgent: base_url=self.base_url, api_mode=self.api_mode, api_call_count=api_call_count, - messages=api_messages, + message_count=len(api_messages), + tool_count=len(self.tools or []), + approx_input_tokens=approx_tokens, + request_char_count=total_chars, max_tokens=self.max_tokens, - tools=self.tools or [], ) except Exception: pass - if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}: + if env_var_enabled("HERMES_DUMP_REQUESTS"): self._dump_api_request_debug(api_kwargs, reason="preflight") # Always prefer the streaming path — even without stream @@ -8386,9 +8406,11 @@ class AIAgent: assistant_message.content = str(raw) try: - from hermes_cli.plugins import invoke_hook - invoke_hook( - "post_llm_request", + from hermes_cli.plugins import invoke_hook as _invoke_hook + _assistant_tool_calls = getattr(assistant_message, "tool_calls", None) or [] + _assistant_text = assistant_message.content or "" + _invoke_hook( + "post_api_request", task_id=effective_task_id, session_id=self.session_id or "", platform=self.platform or "", @@ -8399,9 +8421,11 @@ class AIAgent: api_call_count=api_call_count, api_duration=api_duration, finish_reason=finish_reason, - messages=api_messages, - response=response, - assistant_message=assistant_message, + message_count=len(api_messages), + response_model=getattr(response, "model", None), + usage=self._usage_summary_for_api_request_hook(response), + assistant_content_chars=len(_assistant_text), + assistant_tool_call_count=len(_assistant_tool_calls), ) except Exception: pass diff --git a/scripts/langfuse_smoketest.py b/scripts/langfuse_smoketest.py new file mode 100644 index 0000000000..c298a3a02a --- /dev/null +++ b/scripts/langfuse_smoketest.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 +"""Verify Langfuse credentials and that the user plugin can emit a trace. + +Loads ``~/.hermes/.env`` (and optional repo ``.env``) like Hermes. Run from repo: + + uv run python scripts/langfuse_smoketest.py + +Exit codes: 0 ok, 1 connectivity/plugin failure, 2 missing keys/plugin files. +""" + +from __future__ import annotations + +import argparse +import base64 +import importlib.util +import json +import os +import sys +import uuid +from pathlib import Path +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[1] + + +def _pick(*keys: str) -> str: + for k in keys: + v = os.getenv(k, "").strip() + if v: + return v + return "" + + +def _load_hermes_env() -> None: + repo = _repo_root() + sys.path.insert(0, str(repo)) + from hermes_cli.env_loader import load_hermes_dotenv + from hermes_constants import get_hermes_home + + load_hermes_dotenv(hermes_home=get_hermes_home(), project_env=repo / ".env") + + +def _sdk_smoke() -> str: + from langfuse import Langfuse + + pk = _pick("HERMES_LANGFUSE_PUBLIC_KEY", "LANGFUSE_PUBLIC_KEY", "CC_LANGFUSE_PUBLIC_KEY") + sk = _pick("HERMES_LANGFUSE_SECRET_KEY", "LANGFUSE_SECRET_KEY", "CC_LANGFUSE_SECRET_KEY") + base = _pick("HERMES_LANGFUSE_BASE_URL", "LANGFUSE_BASE_URL", "CC_LANGFUSE_BASE_URL") + if not base: + base = "https://cloud.langfuse.com" + if not pk or not sk: + print("ERROR: set HERMES_LANGFUSE_PUBLIC_KEY and HERMES_LANGFUSE_SECRET_KEY (or LANGFUSE_* aliases).") + sys.exit(2) + + lf = Langfuse(public_key=pk, secret_key=sk, base_url=base) + if not lf.auth_check(): + print("ERROR: Langfuse auth_check() returned False.") + sys.exit(1) + + trace_id = lf.create_trace_id(seed="hermes-langfuse-smoketest") + root = lf.start_observation( + trace_context={"trace_id": trace_id}, + name="Hermes langfuse_smoketest (SDK)", + as_type="chain", + input={"check": "sdk"}, + metadata={"source": "scripts/langfuse_smoketest.py"}, + ) + child = root.start_observation( + name="sub-span", + as_type="generation", + input={"ping": True}, + model="smoke/test", + ) + child.update(output={"pong": True}) + child.end() + root.end() + lf.flush() + try: + url = lf.get_trace_url(trace_id=trace_id) + except Exception: + url = f"{base.rstrip('/')}/traces/{trace_id}" + print("SDK smoke: OK") + print(" trace_id:", trace_id) + print(" url:", url) + return trace_id + + +def _plugin_smoke() -> None: + plugin_path = Path.home() / ".hermes" / "plugins" / "langfuse_tracing" / "__init__.py" + if not plugin_path.is_file(): + print("SKIP plugin smoke: no file at", plugin_path) + return + + spec = importlib.util.spec_from_file_location("langfuse_tracing_smoke", plugin_path) + if spec is None or spec.loader is None: + print("ERROR: cannot load plugin module spec") + sys.exit(1) + mod = importlib.util.module_from_spec(spec) + sys.modules["langfuse_tracing_smoke"] = mod + spec.loader.exec_module(mod) + + mod._TRACE_STATE.clear() + mod._LANGFUSE_CLIENT = None + + session_id = f"smoke_sess_{uuid.uuid4().hex[:8]}" + effective_task_id = str(uuid.uuid4()) + user_msg = "Langfuse plugin smoketest message." + + mod.on_pre_llm_call( + session_id=session_id, + user_message=user_msg, + conversation_history=[], + model="smoke/model", + platform="cli", + ) + mod.on_pre_api_request( + task_id=effective_task_id, + session_id=session_id, + platform="cli", + model="smoke/model", + provider="test", + base_url="http://localhost", + api_mode="chat_completions", + api_call_count=1, + message_count=1, + tool_count=0, + approx_input_tokens=10, + request_char_count=40, + max_tokens=256, + ) + mod.on_post_api_request( + task_id=effective_task_id, + session_id=session_id, + provider="test", + base_url="http://localhost", + api_mode="chat_completions", + model="smoke/model", + api_call_count=1, + api_duration=0.01, + finish_reason="stop", + usage={ + "input_tokens": 5, + "output_tokens": 5, + "total_tokens": 10, + "reasoning_tokens": 0, + "cache_read_tokens": 0, + "cache_write_tokens": 0, + }, + assistant_content_chars=4, + assistant_tool_call_count=0, + response_model="smoke/model", + ) + mod.on_post_llm_call( + session_id=session_id, + user_message=user_msg, + assistant_response="pong", + conversation_history=[], + model="smoke/model", + platform="cli", + ) + + client = mod._get_langfuse() + if client is None: + print("SKIP plugin smoke: Langfuse disabled or keys missing (_get_langfuse is None).") + return + client.flush() + print("Plugin hook chain: OK (flushed)") + print(" session_id:", session_id) + + +def _api_list_traces(limit: int = 2) -> None: + pk = _pick("HERMES_LANGFUSE_PUBLIC_KEY", "LANGFUSE_PUBLIC_KEY", "CC_LANGFUSE_PUBLIC_KEY") + sk = _pick("HERMES_LANGFUSE_SECRET_KEY", "LANGFUSE_SECRET_KEY", "CC_LANGFUSE_SECRET_KEY") + base = _pick("HERMES_LANGFUSE_BASE_URL", "LANGFUSE_BASE_URL", "CC_LANGFUSE_BASE_URL") + if not base or not pk or not sk: + return + base = base.rstrip("/") + auth = base64.b64encode(f"{pk}:{sk}".encode()).decode() + req = Request( + f"{base}/api/public/traces?limit={limit}", + headers={"Authorization": f"Basic {auth}"}, + ) + try: + with urlopen(req, timeout=15) as resp: + payload = json.loads(resp.read().decode()) + except (HTTPError, URLError, TimeoutError, json.JSONDecodeError) as exc: + print("REST list traces: failed:", exc) + return + rows = payload.get("data") or [] + print(f"REST /api/public/traces?limit={limit}: {len(rows)} row(s)") + for row in rows: + name = row.get("name") + tid = row.get("id") + ts = row.get("timestamp") + print(f" - {ts} {name!r} id={tid}") + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--no-plugin", action="store_true", help="Only run SDK smoke + REST list") + args = parser.parse_args() + + _load_hermes_env() + _sdk_smoke() + if not args.no_plugin: + _plugin_smoke() + _api_list_traces(limit=3) + print("Done.") + + +if __name__ == "__main__": + main() diff --git a/tests/test_langfuse_tracing_plugin_installed.py b/tests/test_langfuse_tracing_plugin_installed.py new file mode 100644 index 0000000000..d85d83a5c4 --- /dev/null +++ b/tests/test_langfuse_tracing_plugin_installed.py @@ -0,0 +1,102 @@ +"""Smoke tests for the user-installed Langfuse plugin (when present). + +The canonical plugin lives under ``~/.hermes/plugins/langfuse_tracing/``. +These tests are skipped in CI unless that directory exists locally. +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +PLUGIN_INIT = Path.home() / ".hermes" / "plugins" / "langfuse_tracing" / "__init__.py" + +needs_user_plugin = pytest.mark.skipif( + not PLUGIN_INIT.is_file(), + reason="langfuse_tracing plugin not installed at ~/.hermes/plugins/langfuse_tracing/", +) + + +def _load_user_plugin(): + name = "langfuse_tracing_user_plugin" + if name in sys.modules: + return sys.modules[name] + spec = importlib.util.spec_from_file_location(name, PLUGIN_INIT) + if spec is None or spec.loader is None: + raise RuntimeError("cannot load langfuse plugin") + mod = importlib.util.module_from_spec(spec) + sys.modules[name] = mod + spec.loader.exec_module(mod) + return mod + + +@needs_user_plugin +def test_langfuse_plugin_registers_api_request_hooks(): + mod = _load_user_plugin() + ctx = MagicMock() + ctx.manifest.name = "langfuse_tracing" + mod.register(ctx) + registered = [c[0][0] for c in ctx.register_hook.call_args_list] + assert "pre_api_request" in registered + assert "post_api_request" in registered + assert "pre_llm_call" in registered + + +@needs_user_plugin +def test_pre_post_api_request_smoke_with_mock_langfuse(): + mod = _load_user_plugin() + mod._TRACE_STATE.clear() + + gen_obs = MagicMock() + root_obs = MagicMock() + root_obs.start_observation.return_value = gen_obs + + client = MagicMock() + client.create_trace_id.return_value = "trace-smoke-test" + client.start_observation.return_value = root_obs + + with patch.object(mod, "_get_langfuse", return_value=client): + mod.on_pre_api_request( + task_id="t1", + session_id="s1", + platform="cli", + model="test/model", + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + api_call_count=1, + message_count=3, + tool_count=5, + approx_input_tokens=100, + request_char_count=400, + max_tokens=4096, + ) + mod.on_post_api_request( + task_id="t1", + session_id="s1", + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + model="test/model", + api_call_count=1, + api_duration=0.05, + finish_reason="stop", + usage={ + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + "reasoning_tokens": 0, + "cache_read_tokens": 0, + "cache_write_tokens": 0, + }, + assistant_content_chars=42, + assistant_tool_call_count=0, + response_model="test/model", + ) + + gen_obs.update.assert_called() + gen_obs.end.assert_called() diff --git a/tests/test_plugins.py b/tests/test_plugins.py index f0576b1cb9..c0edc4d65f 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -196,9 +196,9 @@ class TestPluginLoading: class TestPluginHooks: """Tests for lifecycle hook registration and invocation.""" - def test_valid_hooks_include_request_scoped_llm_hooks(self): - assert "pre_llm_request" in VALID_HOOKS - assert "post_llm_request" in VALID_HOOKS + def test_valid_hooks_include_request_scoped_api_hooks(self): + assert "pre_api_request" in VALID_HOOKS + assert "post_api_request" in VALID_HOOKS def test_register_and_invoke_hook(self, tmp_path, monkeypatch): """Registered hooks are called on invoke_hook().""" @@ -270,7 +270,11 @@ class TestPluginHooks: plugins_dir = tmp_path / "hermes_test" / "plugins" _make_plugin_dir( plugins_dir, "request_hook", - register_body='ctx.register_hook("pre_llm_request", lambda **kw: {"seen": kw.get("api_call_count")})', + register_body=( + 'ctx.register_hook("pre_api_request", ' + 'lambda **kw: {"seen": kw.get("api_call_count"), ' + '"mc": kw.get("message_count"), "tc": kw.get("tool_count")})' + ), ) monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) @@ -278,15 +282,18 @@ class TestPluginHooks: mgr.discover_and_load() results = mgr.invoke_hook( - "pre_llm_request", + "pre_api_request", session_id="s1", task_id="t1", model="test", api_call_count=2, - messages=[], - tools=[], + message_count=5, + tool_count=3, + approx_input_tokens=100, + request_char_count=400, + max_tokens=8192, ) - assert results == [{"seen": 2}] + assert results == [{"seen": 2, "mc": 5, "tc": 3}] def test_invalid_hook_name_warns(self, tmp_path, monkeypatch, caplog): """Registering an unknown hook name logs a warning.""" diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 9ab12bf59e..2819454923 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1454,7 +1454,7 @@ class TestRunConversation: assert mock_handle_function_call.call_args.kwargs["tool_call_id"] == "c1" assert mock_handle_function_call.call_args.kwargs["session_id"] == agent.session_id - def test_request_scoped_llm_hooks_fire_for_each_api_call(self, agent): + def test_request_scoped_api_hooks_fire_for_each_api_call(self, agent): self._setup_agent(agent) tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") resp1 = _mock_response(content="", finish_reason="tool_calls", tool_calls=[tc]) @@ -1477,13 +1477,15 @@ class TestRunConversation: result = agent.run_conversation("search something") assert result["final_response"] == "Done searching" - pre_request_calls = [kw for name, kw in hook_calls if name == "pre_llm_request"] - post_request_calls = [kw for name, kw in hook_calls if name == "post_llm_request"] + pre_request_calls = [kw for name, kw in hook_calls if name == "pre_api_request"] + post_request_calls = [kw for name, kw in hook_calls if name == "post_api_request"] assert len(pre_request_calls) == 2 assert len(post_request_calls) == 2 assert [call["api_call_count"] for call in pre_request_calls] == [1, 2] assert [call["api_call_count"] for call in post_request_calls] == [1, 2] assert all(call["session_id"] == agent.session_id for call in pre_request_calls) + assert all("message_count" in c and "messages" not in c for c in pre_request_calls) + assert all("usage" in c and "response" not in c for c in post_request_calls) def test_interrupt_breaks_loop(self, agent): self._setup_agent(agent)