mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-10 03:22:05 +00:00
* feat(skills): watchers skill — poll RSS / HTTP JSON / GitHub via cron no-agent Ships three reusable polling scripts plus a shared watermark helper as an optional skill. Users wire them into the existing cron (no_agent=True) mode rather than learning a new subsystem. Supersedes the closed PR #21497 (parallel watcher subsystem). Same value, zero new core surface. ## What ships - optional-skills/devops/watchers/SKILL.md: pattern + three example cron commands - optional-skills/devops/watchers/scripts/_watermark.py: shared helper (atomic state writes, bounded ID set, first-run baseline) - optional-skills/devops/watchers/scripts/watch_rss.py: RSS 2.0 + Atom - optional-skills/devops/watchers/scripts/watch_http_json.py: any JSON endpoint with configurable id_field / items_path / headers - optional-skills/devops/watchers/scripts/watch_github.py: issues / pulls / releases / commits (uses GITHUB_TOKEN if present) ## Invariants enforced by the shared helper - First run records baseline, emits nothing (never replays existing feed) - Watermark file is <state_dir>/<name>.json, atomic replace on write - Bounded to 500 IDs (configurable) - Empty stdout when no new items — cron treats that as silent delivery ## Validation - watch_rss.py against news.ycombinator.com/rss first run → empty stdout, watermark populated - Removed one seen-id, second run → emitted exactly that item - No DeprecationWarnings (ET element truth-value footgun dodged explicitly) End-user pattern: 'hermes cron create my-feed --schedule "*/15 * * * *" --no-agent --script $HERMES_HOME/skills/devops/watchers/scripts/watch_rss.py --script-args "--name hn --url https://news.ycombinator.com/rss" --deliver telegram' * docs(skills/watchers): tighten description to match peer optional skills * docs(skills/watchers): align frontmatter + structure with peer optional skills * docs(skills/watchers): gate to linux/macos (shell syntax in examples)
131 lines
4.5 KiB
Python
Executable file
131 lines
4.5 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Watch any JSON endpoint that returns a list of objects; dedup by ID field.
|
|
|
|
Usage (via cron with --no-agent):
|
|
|
|
hermes cron create api-events \\
|
|
--schedule "*/1 * * * *" --no-agent \\
|
|
--script "$HERMES_HOME/skills/devops/watchers/scripts/watch_http_json.py" \\
|
|
--script-args "--name api --url https://api.example.com/events \\
|
|
--id-field event_id --items-path data.events"
|
|
|
|
The response can be:
|
|
- a top-level JSON list (default), or
|
|
- a JSON object with a dotted ``--items-path`` pointing to the list.
|
|
|
|
Each item is deduped by ``--id-field`` (default "id").
|
|
|
|
Optional ``--header KEY:VALUE`` flags pass HTTP headers (repeatable).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
import urllib.error
|
|
import urllib.request
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
from _watermark import Watermark, format_items_as_markdown # type: ignore
|
|
|
|
|
|
def _dig(obj, path: str):
|
|
"""Dotted-path lookup: _dig({'a':{'b':[1,2]}}, 'a.b') → [1,2]."""
|
|
if not path:
|
|
return obj
|
|
cur = obj
|
|
for part in path.split("."):
|
|
if isinstance(cur, dict) and part in cur:
|
|
cur = cur[part]
|
|
else:
|
|
return None
|
|
return cur
|
|
|
|
|
|
def _parse_header(s: str):
|
|
if ":" not in s:
|
|
raise argparse.ArgumentTypeError(
|
|
f"--header expects 'KEY: VALUE' (got {s!r})"
|
|
)
|
|
k, v = s.split(":", 1)
|
|
return (k.strip(), v.strip())
|
|
|
|
|
|
def main() -> int:
|
|
p = argparse.ArgumentParser(description="Poll a JSON endpoint.")
|
|
p.add_argument("--name", required=True, help="Watcher name (used for state file)")
|
|
p.add_argument("--url", required=True, help="JSON endpoint URL")
|
|
p.add_argument("--id-field", default="id",
|
|
help="Field used to dedup items (default: 'id')")
|
|
p.add_argument("--items-path", default="",
|
|
help="Dotted path to the list inside the JSON response (e.g. 'data.events')")
|
|
p.add_argument("--title-field", default="title",
|
|
help="Field used as the item title in the rendered output (default: 'title')")
|
|
p.add_argument("--url-field", default="url",
|
|
help="Field used as the item URL in the rendered output (default: 'url')")
|
|
p.add_argument("--body-field", default="",
|
|
help="Optional body field to include as a snippet under each item")
|
|
p.add_argument("--max", type=int, default=20,
|
|
help="Max new items to emit per tick (default: 20)")
|
|
p.add_argument("--header", action="append", type=_parse_header, default=[],
|
|
metavar="KEY: VALUE",
|
|
help="HTTP header (repeatable)")
|
|
p.add_argument("--timeout", type=float, default=20.0,
|
|
help="HTTP timeout in seconds (default: 20)")
|
|
args = p.parse_args()
|
|
|
|
req = urllib.request.Request(args.url, headers={"User-Agent": "Hermes-Watcher/1.0"})
|
|
for k, v in args.header:
|
|
req.add_header(k, v)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=args.timeout) as resp:
|
|
raw = resp.read()
|
|
except urllib.error.HTTPError as e:
|
|
print(f"watch_http_json: HTTP {e.code} from {args.url}", file=sys.stderr)
|
|
return 2
|
|
except (urllib.error.URLError, TimeoutError, OSError) as e:
|
|
print(f"watch_http_json: network error: {e}", file=sys.stderr)
|
|
return 2
|
|
|
|
try:
|
|
data = json.loads(raw.decode("utf-8"))
|
|
except (UnicodeDecodeError, json.JSONDecodeError) as e:
|
|
print(f"watch_http_json: response is not valid JSON: {e}", file=sys.stderr)
|
|
return 2
|
|
|
|
items = _dig(data, args.items_path) if args.items_path else data
|
|
if not isinstance(items, list):
|
|
print(
|
|
f"watch_http_json: items_path={args.items_path!r} did not resolve to a list "
|
|
f"(got {type(items).__name__})",
|
|
file=sys.stderr,
|
|
)
|
|
return 2
|
|
|
|
# Keep only dicts — skip any bare strings / numbers so filter_new doesn't crash.
|
|
items = [i for i in items if isinstance(i, dict)]
|
|
|
|
wm = Watermark.load(args.name)
|
|
new_items = wm.filter_new(items, id_key=args.id_field)
|
|
wm.save()
|
|
|
|
if args.max > 0:
|
|
new_items = new_items[: args.max]
|
|
|
|
body_key = args.body_field or None
|
|
output = format_items_as_markdown(
|
|
new_items,
|
|
title_key=args.title_field,
|
|
url_key=args.url_field,
|
|
body_key=body_key,
|
|
)
|
|
if output:
|
|
sys.stdout.write(output)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|