mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(cron): track delivery failures in job status (#6042)
_deliver_result() now returns Optional[str] — None on success, error message on failure. All failure paths (unknown platform, platform disabled, config load error, send failure, unresolvable target) return descriptive error strings. mark_job_run() gains delivery_error param, tracked as last_delivery_error on the job — separate from agent execution errors. A job where the agent succeeded but delivery failed shows last_status='ok' + last_delivery_error='...'. The cronjob list tool now surfaces last_delivery_error so agents and users can see when cron outputs aren't arriving. Inspired by PR #5863 (oxngon) — reimplemented with proper wiring. Tests: 3 new mark_job_run tests + 6 new _deliver_result return tests.
This commit is contained in:
parent
598c25d43e
commit
fff237e111
6 changed files with 167 additions and 22 deletions
|
|
@ -574,12 +574,16 @@ def remove_job(job_id: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
|
||||
def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
|
||||
delivery_error: Optional[str] = None):
|
||||
"""
|
||||
Mark a job as having been run.
|
||||
|
||||
Updates last_run_at, last_status, increments completed count,
|
||||
computes next_run_at, and auto-deletes if repeat limit reached.
|
||||
|
||||
``delivery_error`` is tracked separately from the agent error — a job
|
||||
can succeed (agent produced output) but fail delivery (platform down).
|
||||
"""
|
||||
jobs = load_jobs()
|
||||
for i, job in enumerate(jobs):
|
||||
|
|
@ -588,6 +592,8 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
|
|||
job["last_run_at"] = now
|
||||
job["last_status"] = "ok" if success else "error"
|
||||
job["last_error"] = error if not success else None
|
||||
# Track delivery failures separately — cleared on successful delivery
|
||||
job["last_delivery_error"] = delivery_error
|
||||
|
||||
# Increment completed count
|
||||
if job.get("repeat"):
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
|
|||
logger.warning("Job '%s': failed to send media %s: %s", job.get("id", "?"), media_path, e)
|
||||
|
||||
|
||||
def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
||||
def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Optional[str]:
|
||||
"""
|
||||
Deliver job output to the configured target (origin chat, specific platform, etc.).
|
||||
|
||||
|
|
@ -204,16 +204,16 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
|||
use the live adapter first — this supports E2EE rooms (e.g. Matrix) where
|
||||
the standalone HTTP path cannot encrypt. Falls back to standalone send if
|
||||
the adapter path fails or is unavailable.
|
||||
|
||||
Returns None on success, or an error string on failure.
|
||||
"""
|
||||
target = _resolve_delivery_target(job)
|
||||
if not target:
|
||||
if job.get("deliver", "local") != "local":
|
||||
logger.warning(
|
||||
"Job '%s' deliver=%s but no concrete delivery target could be resolved",
|
||||
job["id"],
|
||||
job.get("deliver", "local"),
|
||||
)
|
||||
return
|
||||
msg = f"no delivery target resolved for deliver={job.get('deliver', 'local')}"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
return None # local-only jobs don't deliver — not a failure
|
||||
|
||||
platform_name = target["platform"]
|
||||
chat_id = target["chat_id"]
|
||||
|
|
@ -239,19 +239,22 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
|||
}
|
||||
platform = platform_map.get(platform_name.lower())
|
||||
if not platform:
|
||||
logger.warning("Job '%s': unknown platform '%s' for delivery", job["id"], platform_name)
|
||||
return
|
||||
msg = f"unknown platform '{platform_name}'"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
|
||||
try:
|
||||
config = load_gateway_config()
|
||||
except Exception as e:
|
||||
logger.error("Job '%s': failed to load gateway config for delivery: %s", job["id"], e)
|
||||
return
|
||||
msg = f"failed to load gateway config: {e}"
|
||||
logger.error("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
|
||||
pconfig = config.platforms.get(platform)
|
||||
if not pconfig or not pconfig.enabled:
|
||||
logger.warning("Job '%s': platform '%s' not configured/enabled", job["id"], platform_name)
|
||||
return
|
||||
msg = f"platform '{platform_name}' not configured/enabled"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
|
||||
# Optionally wrap the content with a header/footer so the user knows this
|
||||
# is a cron delivery. Wrapping is on by default; set cron.wrap_response: false
|
||||
|
|
@ -307,7 +310,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
|||
|
||||
if adapter_ok:
|
||||
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
|
||||
return
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
|
||||
|
|
@ -329,13 +332,17 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
|||
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
|
||||
result = future.result(timeout=30)
|
||||
except Exception as e:
|
||||
logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
|
||||
return
|
||||
msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
|
||||
logger.error("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
|
||||
if result and result.get("error"):
|
||||
logger.error("Job '%s': delivery error: %s", job["id"], result["error"])
|
||||
else:
|
||||
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
|
||||
msg = f"delivery error: {result['error']}"
|
||||
logger.error("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
|
||||
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
|
||||
return None
|
||||
|
||||
|
||||
_SCRIPT_TIMEOUT = 120 # seconds
|
||||
|
|
@ -868,13 +875,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
|||
logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
|
||||
should_deliver = False
|
||||
|
||||
delivery_error = None
|
||||
if should_deliver:
|
||||
try:
|
||||
_deliver_result(job, deliver_content, adapters=adapters, loop=loop)
|
||||
delivery_error = _deliver_result(job, deliver_content, adapters=adapters, loop=loop)
|
||||
except Exception as de:
|
||||
delivery_error = str(de)
|
||||
logger.error("Delivery failed for job %s: %s", job["id"], de)
|
||||
|
||||
mark_job_run(job["id"], success, error)
|
||||
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
|
||||
executed += 1
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue