mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-13 03:52:00 +00:00
feat(skills): watchers skill — poll RSS / HTTP JSON / GitHub via cron no-agent (#21881)
* feat(skills): watchers skill — poll RSS / HTTP JSON / GitHub via cron no-agent Ships three reusable polling scripts plus a shared watermark helper as an optional skill. Users wire them into the existing cron (no_agent=True) mode rather than learning a new subsystem. Supersedes the closed PR #21497 (parallel watcher subsystem). Same value, zero new core surface. ## What ships - optional-skills/devops/watchers/SKILL.md: pattern + three example cron commands - optional-skills/devops/watchers/scripts/_watermark.py: shared helper (atomic state writes, bounded ID set, first-run baseline) - optional-skills/devops/watchers/scripts/watch_rss.py: RSS 2.0 + Atom - optional-skills/devops/watchers/scripts/watch_http_json.py: any JSON endpoint with configurable id_field / items_path / headers - optional-skills/devops/watchers/scripts/watch_github.py: issues / pulls / releases / commits (uses GITHUB_TOKEN if present) ## Invariants enforced by the shared helper - First run records baseline, emits nothing (never replays existing feed) - Watermark file is <state_dir>/<name>.json, atomic replace on write - Bounded to 500 IDs (configurable) - Empty stdout when no new items — cron treats that as silent delivery ## Validation - watch_rss.py against news.ycombinator.com/rss first run → empty stdout, watermark populated - Removed one seen-id, second run → emitted exactly that item - No DeprecationWarnings (ET element truth-value footgun dodged explicitly) End-user pattern: 'hermes cron create my-feed --schedule "*/15 * * * *" --no-agent --script $HERMES_HOME/skills/devops/watchers/scripts/watch_rss.py --script-args "--name hn --url https://news.ycombinator.com/rss" --deliver telegram' * docs(skills/watchers): tighten description to match peer optional skills * docs(skills/watchers): align frontmatter + structure with peer optional skills * docs(skills/watchers): gate to linux/macos (shell syntax in examples)
This commit is contained in:
parent
839cdd1b05
commit
ea8e608821
5 changed files with 680 additions and 0 deletions
121
optional-skills/devops/watchers/scripts/watch_rss.py
Executable file
121
optional-skills/devops/watchers/scripts/watch_rss.py
Executable file
|
|
@ -0,0 +1,121 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Watch an RSS 2.0 or Atom feed; print new items to stdout, silent on empty.
|
||||
|
||||
Usage (via cron with --no-agent):
|
||||
|
||||
hermes cron create my-feed \\
|
||||
--schedule "*/15 * * * *" --no-agent \\
|
||||
--script "$HERMES_HOME/skills/devops/watchers/scripts/watch_rss.py" \\
|
||||
--script-args "--name hn --url https://news.ycombinator.com/rss"
|
||||
|
||||
First run records a baseline (emits nothing). Subsequent runs emit only
|
||||
items whose <guid> / <id> isn't in the watermark.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from _watermark import Watermark, format_items_as_markdown # type: ignore
|
||||
|
||||
|
||||
def _strip_ns(tag: str) -> str:
|
||||
return tag.split("}", 1)[1] if "}" in tag else tag
|
||||
|
||||
|
||||
def _parse_feed(xml_bytes: bytes):
|
||||
"""Return a list of {id, title, url, summary} dicts.
|
||||
|
||||
Handles both RSS 2.0 ``<item>`` and Atom ``<entry>``.
|
||||
"""
|
||||
try:
|
||||
root = ET.fromstring(xml_bytes)
|
||||
except ET.ParseError as e:
|
||||
print(f"watch_rss: invalid XML: {e}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
entries = []
|
||||
for item in root.iter():
|
||||
tag = _strip_ns(item.tag)
|
||||
if tag not in ("item", "entry"):
|
||||
continue
|
||||
# ElementTree Elements without children are *falsy* — use `is not None`.
|
||||
children = {_strip_ns(c.tag): c for c in item}
|
||||
|
||||
guid_el = children.get("guid")
|
||||
if guid_el is None:
|
||||
guid_el = children.get("id")
|
||||
link_el = children.get("link")
|
||||
if link_el is not None:
|
||||
href = link_el.attrib.get("href") or (link_el.text or "").strip()
|
||||
else:
|
||||
href = ""
|
||||
guid = (guid_el.text or "").strip() if guid_el is not None else ""
|
||||
guid = guid or href
|
||||
if not guid:
|
||||
continue
|
||||
|
||||
title_el = children.get("title")
|
||||
title = (title_el.text or "").strip() if title_el is not None else ""
|
||||
|
||||
summ_el = children.get("description")
|
||||
if summ_el is None:
|
||||
summ_el = children.get("summary")
|
||||
summary = (summ_el.text or "").strip() if summ_el is not None else ""
|
||||
|
||||
entries.append(
|
||||
{"id": guid, "title": title, "url": href, "summary": summary}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def main() -> int:
|
||||
p = argparse.ArgumentParser(description="Watch an RSS/Atom feed.")
|
||||
p.add_argument("--name", required=True, help="Watcher name (used for state file)")
|
||||
p.add_argument("--url", required=True, help="Feed URL")
|
||||
p.add_argument("--max", type=int, default=10,
|
||||
help="Max new items to emit per tick (default: 10)")
|
||||
p.add_argument("--with-summary", action="store_true",
|
||||
help="Include <description>/<summary> snippet under each item")
|
||||
p.add_argument("--timeout", type=float, default=20.0,
|
||||
help="HTTP timeout in seconds (default: 20)")
|
||||
args = p.parse_args()
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(args.url, headers={"User-Agent": "Hermes-Watcher/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=args.timeout) as resp:
|
||||
xml_bytes = resp.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
print(f"watch_rss: HTTP {e.code} from {args.url}", file=sys.stderr)
|
||||
return 2
|
||||
except (urllib.error.URLError, TimeoutError, OSError) as e:
|
||||
print(f"watch_rss: network error: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
entries = _parse_feed(xml_bytes)
|
||||
|
||||
wm = Watermark.load(args.name)
|
||||
new_items = wm.filter_new(entries, id_key="id")
|
||||
wm.save()
|
||||
|
||||
# Cap emitted items (watermark still records all seen IDs so we don't
|
||||
# re-emit them next tick).
|
||||
if args.max > 0:
|
||||
new_items = new_items[: args.max]
|
||||
|
||||
body_key = "summary" if args.with_summary else None
|
||||
output = format_items_as_markdown(new_items, body_key=body_key)
|
||||
if output:
|
||||
sys.stdout.write(output)
|
||||
# Empty stdout on no-new — cron treats that as silent.
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue