diff --git a/run_agent.py b/run_agent.py index a6ea2b1e72..720bc19dcb 100644 --- a/run_agent.py +++ b/run_agent.py @@ -10421,6 +10421,15 @@ class AIAgent: from hermes_logging import set_session_context set_session_context(self.session_id) + # Bind the skill write-origin ContextVar for this thread so tool + # handlers (e.g. skill_manage create) can tell whether they are + # running inside the background self-improvement review fork vs. + # a foreground user-directed turn. Set at the top of each call; + # the review fork runs on its own thread with a fresh context, + # so the foreground value here does not leak into it. + from tools.skill_provenance import set_current_write_origin + set_current_write_origin(getattr(self, "_memory_write_origin", "assistant_tool")) + # If the previous turn activated fallback, restore the primary # runtime so this turn gets a fresh attempt with the preferred model. # No-op when _fallback_activated is False (gateway, first turn, etc.). diff --git a/tests/hermes_cli/test_curator_status.py b/tests/hermes_cli/test_curator_status.py index 3be5862592..b4c3548c42 100644 --- a/tests/hermes_cli/test_curator_status.py +++ b/tests/hermes_cli/test_curator_status.py @@ -114,6 +114,12 @@ def test_status_shows_most_and_least_used_sections(curator_status_env): env["make_skill"]("top-dog") env["make_skill"]("middling") env["make_skill"]("never-used") + # Mark all three as agent-created so they enter the curator's catalog. + # Under the provenance-marker semantics, skills must be explicitly opted + # into curator management (normally via the background-review fork when + # it creates a skill through skill_manage). + for n in ("top-dog", "middling", "never-used"): + env["skill_usage"].mark_agent_created(n) # Bump use_count differentially. All three counters (use/view/patch) feed # into activity_count, so bumping use alone is enough to make activity @@ -150,7 +156,9 @@ def test_status_hides_most_active_when_all_zero(curator_status_env): env = curator_status_env env["make_skill"]("a") env["make_skill"]("b") - # No bumps. + # Mark both as agent-created so the catalog lists them. No bumps. + env["skill_usage"].mark_agent_created("a") + env["skill_usage"].mark_agent_created("b") out = _capture_status(env["curator_cli"]) diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py index 934215d945..e24e19dea1 100644 --- a/tests/tools/test_skill_manager_tool.py +++ b/tests/tools/test_skill_manager_tool.py @@ -531,13 +531,41 @@ class TestSkillManageDispatcher: assert result["success"] is False def test_full_create_via_dispatcher(self, tmp_path): + """Foreground create does NOT mark the skill as agent-created. + + Skills created by user-directed foreground turns belong to the user; + only the background self-improvement review fork should mark its + own sediment as agent-created (so the curator can later consolidate + or prune it). + """ with _skill_dir(tmp_path): raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT) from tools.skill_usage import load_usage usage = load_usage() result = json.loads(raw) assert result["success"] is True - assert usage["test-skill"]["created_by"] == "agent" + # No provenance marker on a foreground create — record either missing + # entirely (telemetry best-effort) or present with created_by unset. + rec = usage.get("test-skill") or {} + assert rec.get("created_by") in (None, "", False) + + def test_create_from_background_review_marks_agent_created(self, tmp_path): + """Background-review fork creates ARE marked as agent-created.""" + from tools.skill_provenance import set_current_write_origin, BACKGROUND_REVIEW + token = set_current_write_origin(BACKGROUND_REVIEW) + try: + with _skill_dir(tmp_path): + raw = skill_manage( + action="create", name="review-sediment", content=VALID_SKILL_CONTENT + ) + from tools.skill_usage import load_usage + usage = load_usage() + finally: + from tools.skill_provenance import reset_current_write_origin + reset_current_write_origin(token) + result = json.loads(raw) + assert result["success"] is True + assert usage["review-sediment"]["created_by"] == "agent" def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path): # Dispatcher must plumb absorbed_into through to _delete_skill so the diff --git a/tests/tools/test_skill_provenance.py b/tests/tools/test_skill_provenance.py new file mode 100644 index 0000000000..77f505bb86 --- /dev/null +++ b/tests/tools/test_skill_provenance.py @@ -0,0 +1,102 @@ +"""Tests for tools/skill_provenance.py — write-origin ContextVar.""" + +import contextvars + +import pytest + + +def test_default_origin_is_foreground(): + from tools.skill_provenance import get_current_write_origin + # In a fresh ContextVar context, default kicks in. + ctx = contextvars.copy_context() + origin = ctx.run(get_current_write_origin) + assert origin == "foreground" + + +def test_set_and_get_origin(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + token = set_current_write_origin("background_review") + try: + assert get_current_write_origin() == "background_review" + finally: + reset_current_write_origin(token) + + +def test_reset_restores_prior_origin(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + outer = set_current_write_origin("assistant_tool") + try: + inner = set_current_write_origin("background_review") + try: + assert get_current_write_origin() == "background_review" + finally: + reset_current_write_origin(inner) + assert get_current_write_origin() == "assistant_tool" + finally: + reset_current_write_origin(outer) + + +def test_is_background_review_truthy_only_for_review(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + is_background_review, + BACKGROUND_REVIEW, + ) + for origin, expected in ( + ("foreground", False), + ("assistant_tool", False), + ("random_other_value", False), + (BACKGROUND_REVIEW, True), + ): + token = set_current_write_origin(origin) + try: + assert is_background_review() is expected, ( + f"is_background_review() wrong for origin={origin!r}" + ) + finally: + reset_current_write_origin(token) + + +def test_empty_origin_falls_back_to_foreground(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + token = set_current_write_origin("") + try: + # Empty is coerced to "foreground" at the set() boundary. + assert get_current_write_origin() == "foreground" + finally: + reset_current_write_origin(token) + + +def test_context_isolation_between_copies(): + """ContextVar scoping: modifications in one copy do not leak out.""" + from tools.skill_provenance import ( + set_current_write_origin, + get_current_write_origin, + BACKGROUND_REVIEW, + ) + + # Start at the module default. + original = get_current_write_origin() + + def _run_in_copy(): + set_current_write_origin(BACKGROUND_REVIEW) + return get_current_write_origin() + + ctx = contextvars.copy_context() + inside = ctx.run(_run_in_copy) + assert inside == BACKGROUND_REVIEW + # Parent context unaffected. + assert get_current_write_origin() == original diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index e7d264de67..58c3fe3d2d 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -784,11 +784,16 @@ def skill_manage( pass # Curator telemetry: bump patch_count on edit/patch/write_file (the actions # that mutate an existing skill's guidance), drop the record on delete. - # Best-effort; telemetry failures never break the tool. + # Only mark a skill as agent-created when the background self-improvement + # review fork creates it — foreground `skill_manage(create)` calls are + # user-directed, and those skills belong to the user (the curator must + # not touch them). Best-effort; telemetry failures never break the tool. try: from tools.skill_usage import bump_patch, forget, mark_agent_created + from tools.skill_provenance import is_background_review if action == "create": - mark_agent_created(name) + if is_background_review(): + mark_agent_created(name) elif action in ("patch", "edit", "write_file", "remove_file"): bump_patch(name) elif action == "delete": diff --git a/tools/skill_provenance.py b/tools/skill_provenance.py new file mode 100644 index 0000000000..9f43efc3fc --- /dev/null +++ b/tools/skill_provenance.py @@ -0,0 +1,78 @@ +"""Skill write-origin provenance — ContextVar for distinguishing agent-sediment skill writes from foreground user-directed writes. + +The curator only consolidates/prunes skills it autonomously created via the +background self-improvement review fork. Skills a user asks a foreground +agent to write belong to the user and must never be auto-curated. + +This module exposes a ContextVar that run_agent.py sets before each tool +loop so tool handlers (e.g. skill_manage create) can check whether they +are executing inside the background-review fork. + +The signal piggybacks on AIAgent._memory_write_origin, which is already +set to "background_review" for review-fork instances (see +_spawn_background_review in run_agent.py) and defaults to "assistant_tool" +for normal (foreground) agents. + +Usage: + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + + token = set_current_write_origin("background_review") + try: + ... # tool runs here + finally: + reset_current_write_origin(token) + + # inside a tool: + if get_current_write_origin() == "background_review": + mark_agent_created(skill_name) +""" + +import contextvars + + +_write_origin: contextvars.ContextVar[str] = contextvars.ContextVar( + "skill_write_origin", + default="foreground", +) + +# The sentinel value the background review fork uses; mirrors +# run_agent.py's AIAgent._memory_write_origin override in +# _spawn_background_review(). +BACKGROUND_REVIEW = "background_review" + + +def set_current_write_origin(origin: str) -> contextvars.Token[str]: + """Bind the active write origin to the current context. + + Returns a Token the caller must pass to reset_current_write_origin + in a finally block. + """ + return _write_origin.set(origin or "foreground") + + +def reset_current_write_origin(token: contextvars.Token[str]) -> None: + """Restore the prior write origin context.""" + _write_origin.reset(token) + + +def get_current_write_origin() -> str: + """Return the active write origin. + + Default: "foreground" — any tool call made by a regular (non-review) + agent, from the CLI, the gateway, cron, or a subagent. + + "background_review" — the self-improvement review fork; only skills + created under this origin should be marked agent-created for curator + management. + """ + return _write_origin.get() + + +def is_background_review() -> bool: + """Convenience: True iff the current write origin is the background + review fork.""" + return get_current_write_origin() == BACKGROUND_REVIEW