mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-06 07:51:53 +00:00
tools(terminal): nudge homebrewed CI pollers at the tool surface (#33142)
Background processes whose command contains `gh pr view --json statusCheckRollup` or `gh pr checks | jq` now get a runtime hint in the result pointing at the canonical green-ci-policy snippets. The homebrew shape has caused at least seven silent CI-watcher failures in the past two weeks (#31329, #31448, #31695, #31709, #31745, #32264, #33131) — each one a different jq/awk/grep variation of the same fundamental problem (stdout buffering, jq null-key edge cases, conclusion-vs-status confusion, TTY-only banner grepping). The skill that documents this anti-pattern is excellent, but a skill only fires if the agent loads it. The tool surface fires on every misuse. This is the embed-footguns-in-tool-surface pattern from PR #31289 applied to a recurring failure mode that's outgrown skill-only enforcement. Detector is deliberately narrow — flags two specific shapes: 1. Any command containing `statusCheckRollup` (the JSON-API path — conclusion vs status field semantics keep burning us). 2. `gh pr view` / `gh pr checks` combined with `jq` (gh pr checks doesn't emit JSON, so any `| jq` here is confused intent; the canonical column-2 poller uses awk-on-tabs, not jq). Does NOT flag the blessed column-2 awk-on-tabs poller (which uses `awk -F"\t" "\==\"pending\""`) or the exit-code-driven `gh pr checks $PR >/dev/null` snippet. Hint composes with the existing background-without-notify_on_complete hint — both can fire on the same call. Each is independently actionable. Tests: - 4 new cases in tests/tools/test_notify_on_complete.py - test_homebrew_ci_poller_via_statusCheckRollup_emits_hint (positive) - test_homebrew_ci_poller_via_gh_pr_checks_piped_to_jq_emits_hint (positive) - test_canonical_column2_awk_poller_does_not_emit_homebrew_hint (negative) - test_canonical_gh_pr_checks_exit_code_loop_does_not_emit_hint (negative) - test_non_ci_background_command_does_not_emit_homebrew_hint (negative) - 30/30 passing (was 26)
This commit is contained in:
parent
a890389b69
commit
187cf0f257
2 changed files with 235 additions and 0 deletions
|
|
@ -503,3 +503,159 @@ def test_foreground_command_does_not_emit_hint(monkeypatch, tmp_path):
|
|||
assert "hint" not in result, (
|
||||
f"Foreground commands must not emit the background-silence hint, got: {result.get('hint')!r}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Homebrewed-CI-watcher hint
|
||||
#
|
||||
# Background processes whose command looks like a hand-rolled CI poller
|
||||
# (`gh pr view` / `gh pr checks` combined with jq/awk on stdout) get an
|
||||
# additional hint pointing at the canonical green-ci-policy snippet. The
|
||||
# homebrew shape has burned us repeatedly (May 2026 PRs #31329, #31448,
|
||||
# #31695, #31709, #31745, #32264, #33131) with stdout buffering, jq null
|
||||
# keys, conclusion-vs-status confusion, and TTY-only banner grepping —
|
||||
# none of which the canonical snippets suffer from. Fire on every detection;
|
||||
# false positives are cheap (~one read).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_homebrew_ci_poller_via_statusCheckRollup_emits_hint(monkeypatch, tmp_path):
|
||||
"""The canonical anti-pattern: jq pipeline parsing statusCheckRollup
|
||||
JSON. Tool must point the agent at the green-ci-policy skill snippet."""
|
||||
tt = _silent_bg_harness(monkeypatch, tmp_path)
|
||||
try:
|
||||
result = json.loads(
|
||||
tt.terminal_tool(
|
||||
command=(
|
||||
"PR=12345; while true; do "
|
||||
"status=$(gh pr view $PR --json statusCheckRollup "
|
||||
"--jq '[.statusCheckRollup[] | .conclusion] "
|
||||
"| group_by(.) | map({k:.[0],v:length}) | from_entries'); "
|
||||
"echo \"$status\"; sleep 30; done"
|
||||
),
|
||||
background=True,
|
||||
notify_on_complete=True,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
tt._active_environments.pop("default", None)
|
||||
tt._last_activity.pop("default", None)
|
||||
|
||||
hint = result.get("hint", "")
|
||||
assert hint, "Homebrew CI poller must emit a hint pointing at green-ci-policy"
|
||||
assert "green-ci-policy" in hint, (
|
||||
"Hint must name the canonical skill file so the agent can find the verbatim snippets"
|
||||
)
|
||||
# Naming exit-code-driven OR column-2 in the hint is what makes it actionable.
|
||||
assert "exit" in hint.lower() or "column-2" in hint.lower() or "tab" in hint.lower(), (
|
||||
"Hint must point at the canonical alternatives (exit-code or column-2)"
|
||||
)
|
||||
|
||||
|
||||
def test_homebrew_ci_poller_via_gh_pr_checks_piped_to_jq_emits_hint(monkeypatch, tmp_path):
|
||||
"""`gh pr checks` doesn't emit JSON, so piping it to jq is a confused-
|
||||
intent anti-pattern that produces silent failures (jq fails, loop
|
||||
keeps spinning with empty data)."""
|
||||
tt = _silent_bg_harness(monkeypatch, tmp_path)
|
||||
try:
|
||||
result = json.loads(
|
||||
tt.terminal_tool(
|
||||
command=(
|
||||
"PR=99; while true; do "
|
||||
"gh pr checks $PR | jq -R 'split(\"\\t\")[1]'; "
|
||||
"sleep 30; done"
|
||||
),
|
||||
background=True,
|
||||
notify_on_complete=True,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
tt._active_environments.pop("default", None)
|
||||
tt._last_activity.pop("default", None)
|
||||
|
||||
hint = result.get("hint", "")
|
||||
assert hint, "Homebrew `gh pr checks | jq` poller must emit a hint"
|
||||
assert "green-ci-policy" in hint
|
||||
|
||||
|
||||
def test_canonical_column2_awk_poller_does_not_emit_homebrew_hint(monkeypatch, tmp_path):
|
||||
"""The blessed column-2 awk-on-tabs poller from green-ci-policy is the
|
||||
PREFERRED pattern for sharded matrices. Must not be flagged as
|
||||
homebrew — the gating signal is statusCheckRollup or `gh pr checks
|
||||
| jq`, NOT awk on tabs."""
|
||||
tt = _silent_bg_harness(monkeypatch, tmp_path)
|
||||
try:
|
||||
result = json.loads(
|
||||
tt.terminal_tool(
|
||||
command=(
|
||||
"PR=1; while :; do "
|
||||
"out=$(gh pr checks $PR 2>&1); "
|
||||
"pending=$(echo \"$out\" | awk -F\"\\t\" \"\\$2==\\\"pending\\\"\" | wc -l); "
|
||||
"failed=$(echo \"$out\" | awk -F\"\\t\" \"\\$2==\\\"fail\\\"\" | wc -l); "
|
||||
"if [ \"$pending\" -eq 0 ]; then "
|
||||
"[ \"$failed\" -gt 0 ] && exit 1 || exit 0; "
|
||||
"fi; sleep 30; "
|
||||
"done"
|
||||
),
|
||||
background=True,
|
||||
notify_on_complete=True,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
tt._active_environments.pop("default", None)
|
||||
tt._last_activity.pop("default", None)
|
||||
|
||||
assert "hint" not in result, (
|
||||
f"Canonical column-2 awk poller must not be flagged as homebrew, got: {result.get('hint')!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_canonical_gh_pr_checks_exit_code_loop_does_not_emit_hint(monkeypatch, tmp_path):
|
||||
"""The blessed exit-code-driven snippet from green-ci-policy is exactly
|
||||
what we want — no jq, no awk-on-stdout, gates the loop on exit code.
|
||||
Must not be flagged as a homebrew anti-pattern."""
|
||||
tt = _silent_bg_harness(monkeypatch, tmp_path)
|
||||
try:
|
||||
result = json.loads(
|
||||
tt.terminal_tool(
|
||||
command=(
|
||||
"PR=1; while :; do "
|
||||
"gh pr checks $PR >/dev/null 2>&1; rc=$?; "
|
||||
"case $rc in 0) exit 0;; 8) sleep 30;; *) exit 1;; esac; "
|
||||
"done"
|
||||
),
|
||||
background=True,
|
||||
notify_on_complete=True,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
tt._active_environments.pop("default", None)
|
||||
tt._last_activity.pop("default", None)
|
||||
|
||||
# No silent-process hint (we have notify_on_complete) AND no
|
||||
# homebrew-poller hint (no jq / awk pipeline parsing stdout).
|
||||
assert "hint" not in result, (
|
||||
f"Canonical exit-code-driven poller must not be flagged as homebrew, got: {result.get('hint')!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_non_ci_background_command_does_not_emit_homebrew_hint(monkeypatch, tmp_path):
|
||||
"""A long-running task that happens to use awk for unrelated reasons
|
||||
must not be mistaken for a CI poller — the gating signal is the
|
||||
combination of `gh pr ...` AND a stdout parser."""
|
||||
tt = _silent_bg_harness(monkeypatch, tmp_path)
|
||||
try:
|
||||
result = json.loads(
|
||||
tt.terminal_tool(
|
||||
command="cat /var/log/syslog | awk '/error/ {print}' > /tmp/errs.log",
|
||||
background=True,
|
||||
notify_on_complete=True,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
tt._active_environments.pop("default", None)
|
||||
tt._last_activity.pop("default", None)
|
||||
|
||||
assert "hint" not in result, (
|
||||
f"Non-CI command using awk must not be flagged as homebrew CI poller, got: {result.get('hint')!r}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1901,6 +1901,85 @@ def terminal_tool(
|
|||
"that never exit (servers, watchers, daemons)."
|
||||
)
|
||||
|
||||
# Nudge: homebrewed CI watcher built from `gh pr view`
|
||||
# `--json statusCheckRollup` or `gh pr checks` piped through
|
||||
# `jq` is the #1 cause of silent CI-watcher failures in
|
||||
# hermes-agent dev work. May 2026 PRs that surfaced this
|
||||
# exact failure mode: #31329, #31448, #31695, #31709, #31745,
|
||||
# #32264, #33131. Failure modes seen:
|
||||
# * `gh pr view --json statusCheckRollup --jq ...` with
|
||||
# `from_entries` choking on null `conclusion` keys, loop
|
||||
# silently exits with empty status, never terminates.
|
||||
# * `for i in $(seq 1 60); do ... 2>&1` block-buffered stdout
|
||||
# never flushed to background-process capture; SIGTERM
|
||||
# cuts the buffer before flush; `process(action='log')`
|
||||
# returns total_lines=0 forever.
|
||||
# * conclusion vs. status field confusion: filtering for
|
||||
# `PENDING` in `.conclusion` while in-progress checks have
|
||||
# empty conclusion → poller declares all-green while 18/23
|
||||
# checks still IN_PROGRESS.
|
||||
# * grepping for TTY-only banners ("All checks were
|
||||
# successful") that never appear when stdout is piped.
|
||||
# The canonical patterns in the green-ci-policy skill avoid
|
||||
# every one of these — drive the loop off exit codes or on
|
||||
# tab-separated `awk -F"\t" "$2==\"pending\""` (column 2).
|
||||
# The detector here is deliberately narrow: it flags the
|
||||
# statusCheckRollup JSON-API path and the `gh pr checks` +
|
||||
# jq combination, but NOT the canonical column-2 awk
|
||||
# poller (which uses awk on tabs, not as a generic
|
||||
# stdout parser). When we detect the homebrew shape, point
|
||||
# the agent at the canonical snippet rather than letting
|
||||
# it ship another broken poller.
|
||||
if background and command:
|
||||
_gh = ("gh pr view" in command or "gh pr checks" in command)
|
||||
_has_jq = (
|
||||
" jq " in command or "| jq" in command or "$(jq" in command
|
||||
)
|
||||
_bad_shape = (
|
||||
# The JSON-API anti-pattern. Even without jq, going
|
||||
# through `--json statusCheckRollup` + parsing puts
|
||||
# you in conclusion-vs-status field hell.
|
||||
"statusCheckRollup" in command
|
||||
# gh pr checks piped to jq is also wrong — `gh pr
|
||||
# checks` doesn't emit JSON, so any `| jq` here is
|
||||
# confused intent. The canonical column-2 poller
|
||||
# uses awk-on-tabs, not jq.
|
||||
or (_gh and _has_jq)
|
||||
)
|
||||
if _bad_shape:
|
||||
existing = result_data.get("hint", "")
|
||||
canonical_hint = (
|
||||
"This looks like a homebrewed CI poller built from "
|
||||
"`gh pr view --json statusCheckRollup` and/or "
|
||||
"`gh pr checks | jq`. That shape has burned us "
|
||||
"repeatedly in hermes-agent dev work (PRs #31329, "
|
||||
"#31448, #31695, #31709, #31745, #32264, #33131) — "
|
||||
"stdout buffering kills output capture, jq null-key "
|
||||
"edge cases silently exit the loop, conclusion-vs-"
|
||||
"status field confusion exits early with bogus "
|
||||
"all-green verdicts, TTY-only summary banners "
|
||||
"never appear when piped. Use the canonical "
|
||||
"snippets in the green-ci-policy skill instead: "
|
||||
"the exit-code-driven `gh pr checks $PR >/dev/null` "
|
||||
"(rc 0 = green, 8 = pending, else fail) for "
|
||||
"exit-on-first-fail behavior, or the column-2 "
|
||||
"awk-on-tabs poller "
|
||||
"(`awk -F\"\\t\" \"$2==\\\"pending\\\"\"`) for "
|
||||
"sharded matrices. Load skill_view("
|
||||
"name='github/hermes-agent-dev', "
|
||||
"file_path='references/green-ci-policy.md') for "
|
||||
"the verbatim snippets. If you must roll a custom "
|
||||
"loop with rich structured output, write each tick "
|
||||
"to a known file (`tee -a /tmp/ci.log`) and rely "
|
||||
"on `process(action='log')` to read THAT file — "
|
||||
"do not rely on background-process stdout capture "
|
||||
"for line-buffered shell loops."
|
||||
)
|
||||
result_data["hint"] = (
|
||||
existing + "\n\n" + canonical_hint if existing
|
||||
else canonical_hint
|
||||
)
|
||||
|
||||
# Populate routing metadata on the session so that
|
||||
# watch-pattern and completion notifications can be
|
||||
# routed back to the correct chat/thread.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue