mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-11 08:42:11 +00:00
Clear NeMo Relay plugin-config observability only after the last active Hermes session finalizes. Use the plugin's async-safe awaitable helper for both initialize and clear so session rotation remains safe under active event loops. Disable the direct ATIF fallback when plugins.toml already owns the ATIF exporter lifecycle to avoid duplicate trajectory export on finalization.
867 lines
30 KiB
Python
867 lines
30 KiB
Python
"""nemo_relay — optional Hermes plugin for NeMo Relay observability."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import inspect
|
|
import json
|
|
import logging
|
|
import os
|
|
import threading
|
|
import tomllib
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_INIT_FAILED = object()
|
|
_LOCK = threading.RLock()
|
|
_RUNTIME: "_Runtime | object | None" = None
|
|
|
|
|
|
@dataclass
|
|
class _SessionState:
|
|
session_id: str
|
|
handle: Any = None
|
|
atif_exporter: Any = None
|
|
atif_subscriber_name: str = ""
|
|
is_embedded_subagent: bool = False
|
|
parent_session_id: str = ""
|
|
llm_spans: dict[str, Any] = field(default_factory=dict)
|
|
tool_spans: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
@dataclass
|
|
class _SubagentParent:
|
|
parent_session_id: str
|
|
parent_handle: Any
|
|
metadata: dict[str, Any]
|
|
|
|
|
|
@dataclass
|
|
class _Settings:
|
|
plugins_toml_path: str = ""
|
|
plugins_config: dict[str, Any] | None = None
|
|
adaptive_enabled: bool = False
|
|
adaptive_mode: str = "observe"
|
|
atof_enabled: bool = False
|
|
atof_output_directory: str = ""
|
|
atof_filename: str = "hermes-atof.jsonl"
|
|
atof_mode: str = "append"
|
|
atif_enabled: bool = False
|
|
atif_output_directory: str = ""
|
|
atif_filename_template: str = "hermes-atif-{session_id}.json"
|
|
atif_subagent_export_mode: str = "embedded"
|
|
atif_agent_name: str = "Hermes Agent"
|
|
atif_agent_version: str = "unknown"
|
|
atif_model_name: str = "unknown"
|
|
|
|
|
|
class _Runtime:
|
|
def __init__(self, nemo_relay: Any, settings: _Settings) -> None:
|
|
self.nemo_relay = nemo_relay
|
|
self.settings = settings
|
|
self.sessions: dict[str, _SessionState] = {}
|
|
self.subagent_parents: dict[str, _SubagentParent] = {}
|
|
self.atof_exporter: Any = None
|
|
self._plugin_config_initialized = self._configure_plugins_toml()
|
|
if not self._plugin_config_initialized:
|
|
self._configure_atof()
|
|
|
|
def _configure_plugins_toml(self) -> bool:
|
|
if not self.settings.plugins_config:
|
|
return False
|
|
plugin_mod = getattr(self.nemo_relay, "plugin", None)
|
|
initialize = getattr(plugin_mod, "initialize", None)
|
|
if not callable(initialize):
|
|
return False
|
|
try:
|
|
self._ensure_plugin_config_output_dirs(self.settings.plugins_config)
|
|
_resolve_awaitable(initialize(self.settings.plugins_config))
|
|
return True
|
|
except Exception as exc:
|
|
logger.debug("NeMo Relay plugins.toml init failed: %s", exc, exc_info=True)
|
|
return False
|
|
|
|
def _clear_plugins_toml(self) -> None:
|
|
if not self._plugin_config_initialized:
|
|
return
|
|
plugin_mod = getattr(self.nemo_relay, "plugin", None)
|
|
clear = getattr(plugin_mod, "clear", None)
|
|
if not callable(clear):
|
|
return
|
|
_resolve_awaitable(clear())
|
|
self._plugin_config_initialized = False
|
|
|
|
def _ensure_plugin_config_output_dirs(self, config: dict[str, Any]) -> None:
|
|
for component in config.get("components", []):
|
|
if not isinstance(component, dict):
|
|
continue
|
|
if component.get("kind") != "observability":
|
|
continue
|
|
if component.get("enabled") is False:
|
|
continue
|
|
component_config = component.get("config")
|
|
if not isinstance(component_config, dict):
|
|
continue
|
|
for exporter_name in ("atof", "atif"):
|
|
exporter_config = component_config.get(exporter_name)
|
|
if not isinstance(exporter_config, dict):
|
|
continue
|
|
output_directory = exporter_config.get("output_directory")
|
|
if isinstance(output_directory, str) and output_directory.strip():
|
|
Path(output_directory).mkdir(parents=True, exist_ok=True)
|
|
|
|
def _configure_atof(self) -> None:
|
|
if not self.settings.atof_enabled:
|
|
return
|
|
config = self.nemo_relay.AtofExporterConfig()
|
|
if self.settings.atof_output_directory:
|
|
Path(self.settings.atof_output_directory).mkdir(parents=True, exist_ok=True)
|
|
config.output_directory = self.settings.atof_output_directory
|
|
config.filename = self.settings.atof_filename
|
|
if self.settings.atof_mode.lower() == "overwrite":
|
|
config.mode = self.nemo_relay.AtofExporterMode.Overwrite
|
|
else:
|
|
config.mode = self.nemo_relay.AtofExporterMode.Append
|
|
self.atof_exporter = self.nemo_relay.AtofExporter(config)
|
|
self.atof_exporter.register("hermes.nemo_relay.atof")
|
|
|
|
def ensure_session(self, kwargs: dict[str, Any]) -> _SessionState:
|
|
if self.settings.plugins_config and not self._plugin_config_initialized:
|
|
self._plugin_config_initialized = self._configure_plugins_toml()
|
|
session_id = _session_id(kwargs)
|
|
state = self.sessions.get(session_id)
|
|
if state is not None:
|
|
return state
|
|
|
|
state = _SessionState(session_id=session_id)
|
|
if self.settings.atif_enabled:
|
|
state.atif_exporter = self.nemo_relay.AtifExporter(
|
|
session_id,
|
|
self.settings.atif_agent_name,
|
|
self.settings.atif_agent_version,
|
|
model_name=str(kwargs.get("model") or self.settings.atif_model_name),
|
|
extra={"source": "hermes-agent", "plugin": "observability/nemo_relay"},
|
|
)
|
|
state.atif_subscriber_name = f"hermes.nemo_relay.atif.{session_id}"
|
|
state.atif_exporter.register(state.atif_subscriber_name)
|
|
|
|
subagent_parent = self.subagent_parents.get(session_id)
|
|
metadata = _metadata(kwargs)
|
|
parent_handle = None
|
|
if subagent_parent is not None:
|
|
parent_handle = subagent_parent.parent_handle
|
|
metadata = {**metadata, **subagent_parent.metadata}
|
|
state.is_embedded_subagent = True
|
|
state.parent_session_id = subagent_parent.parent_session_id
|
|
|
|
state.handle = self.nemo_relay.scope.push(
|
|
f"hermes-session-{session_id}",
|
|
self.nemo_relay.ScopeType.Agent,
|
|
handle=parent_handle,
|
|
data={"session_id": session_id},
|
|
metadata=metadata,
|
|
)
|
|
self.sessions[session_id] = state
|
|
return state
|
|
|
|
def export_atif(self, state: _SessionState) -> None:
|
|
if not self.settings.atif_enabled or state.atif_exporter is None:
|
|
return
|
|
if state.is_embedded_subagent and self.settings.atif_subagent_export_mode != "all":
|
|
return
|
|
output_dir = self.settings.atif_output_directory
|
|
if not output_dir:
|
|
return
|
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
filename = self.settings.atif_filename_template.format(session_id=state.session_id)
|
|
Path(output_dir, filename).write_text(state.atif_exporter.export_json(), encoding="utf-8")
|
|
|
|
def close_session(self, kwargs: dict[str, Any]) -> None:
|
|
session_id = _session_id(kwargs)
|
|
self.subagent_parents.pop(session_id, None)
|
|
state = self.sessions.pop(session_id, None)
|
|
if state is None:
|
|
return
|
|
if state.handle is not None:
|
|
try:
|
|
self.nemo_relay.scope.pop(state.handle, output=_jsonable(kwargs))
|
|
except Exception:
|
|
logger.debug("NeMo Relay session pop failed", exc_info=True)
|
|
self.export_atif(state)
|
|
if state.atif_exporter is not None and state.atif_subscriber_name:
|
|
try:
|
|
state.atif_exporter.deregister(state.atif_subscriber_name)
|
|
except Exception:
|
|
logger.debug("NeMo Relay ATIF deregister failed", exc_info=True)
|
|
if self._plugin_config_initialized and not self.sessions:
|
|
try:
|
|
self._clear_plugins_toml()
|
|
except Exception:
|
|
logger.debug("NeMo Relay plugins.toml clear failed", exc_info=True)
|
|
|
|
def mark(self, name: str, kwargs: dict[str, Any]) -> None:
|
|
state = self.ensure_session(kwargs)
|
|
self.nemo_relay.scope.event(
|
|
name,
|
|
handle=state.handle,
|
|
data=_jsonable(kwargs),
|
|
metadata=_metadata(kwargs),
|
|
)
|
|
|
|
def mark_subagent_start(self, kwargs: dict[str, Any]) -> None:
|
|
parent_state = self.ensure_session(kwargs)
|
|
metadata = _metadata(kwargs)
|
|
child_session_id = _child_session_id(kwargs)
|
|
if child_session_id:
|
|
self.subagent_parents[child_session_id] = _SubagentParent(
|
|
parent_session_id=parent_state.session_id,
|
|
parent_handle=parent_state.handle,
|
|
metadata=_subagent_child_metadata(kwargs, metadata),
|
|
)
|
|
self.nemo_relay.scope.event(
|
|
"hermes.subagent.start",
|
|
handle=parent_state.handle,
|
|
data=_jsonable(kwargs),
|
|
metadata=metadata,
|
|
)
|
|
|
|
def mark_subagent_stop(self, kwargs: dict[str, Any]) -> None:
|
|
child_session_id = _child_session_id(kwargs)
|
|
if child_session_id:
|
|
self.subagent_parents.pop(child_session_id, None)
|
|
self.mark("hermes.subagent.stop", kwargs)
|
|
|
|
def managed_llm_enabled(self) -> bool:
|
|
return (
|
|
self.settings.adaptive_enabled
|
|
and callable(getattr(getattr(self.nemo_relay, "llm", None), "execute", None))
|
|
and callable(getattr(self.nemo_relay, "LLMRequest", None))
|
|
)
|
|
|
|
def managed_tool_enabled(self) -> bool:
|
|
return (
|
|
self.settings.adaptive_enabled
|
|
and callable(getattr(getattr(self.nemo_relay, "tools", None), "execute", None))
|
|
)
|
|
|
|
def execute_llm(self, kwargs: dict[str, Any]) -> Any:
|
|
state = self.ensure_session(kwargs)
|
|
request_body = _jsonable(kwargs.get("request") or {})
|
|
request = self.nemo_relay.LLMRequest({}, request_body)
|
|
next_call = kwargs.get("next_call")
|
|
if not callable(next_call):
|
|
return request_body
|
|
|
|
raw_response: dict[str, Any] = {"set": False, "value": None}
|
|
|
|
def _impl(next_request: Any) -> Any:
|
|
next_body = getattr(next_request, "content", next_request)
|
|
raw = next_call(next_body if isinstance(next_body, dict) else request_body)
|
|
raw_response["set"] = True
|
|
raw_response["value"] = raw
|
|
return _llm_response_payload(raw)
|
|
|
|
async def _managed_execute() -> Any:
|
|
result = self.nemo_relay.llm.execute(
|
|
str(kwargs.get("provider") or "llm"),
|
|
request,
|
|
_impl,
|
|
handle=state.handle,
|
|
data=_jsonable(
|
|
{
|
|
"turn_id": kwargs.get("turn_id"),
|
|
"api_request_id": kwargs.get("api_request_id"),
|
|
"api_call_count": kwargs.get("api_call_count"),
|
|
"mode": self.settings.adaptive_mode,
|
|
}
|
|
),
|
|
metadata=_metadata(kwargs),
|
|
model_name=str(kwargs.get("model") or ""),
|
|
)
|
|
if inspect.isawaitable(result):
|
|
return await result
|
|
return result
|
|
|
|
managed_result = _resolve_awaitable(_managed_execute())
|
|
return raw_response["value"] if raw_response["set"] else managed_result
|
|
|
|
def execute_tool(self, kwargs: dict[str, Any]) -> Any:
|
|
state = self.ensure_session(kwargs)
|
|
tool_name = str(kwargs.get("tool_name") or "tool")
|
|
args = _jsonable(kwargs.get("args") or {})
|
|
next_call = kwargs.get("next_call")
|
|
if not callable(next_call):
|
|
return args
|
|
|
|
raw_response: dict[str, Any] = {"set": False, "value": None}
|
|
|
|
def _impl(next_args: Any) -> Any:
|
|
effective_args = next_args if isinstance(next_args, dict) else args
|
|
raw = next_call(effective_args)
|
|
raw_response["set"] = True
|
|
raw_response["value"] = raw
|
|
return _jsonable(raw)
|
|
|
|
async def _managed_execute() -> Any:
|
|
result = self.nemo_relay.tools.execute(
|
|
tool_name,
|
|
args,
|
|
_impl,
|
|
handle=state.handle,
|
|
data=_jsonable(
|
|
{
|
|
"turn_id": kwargs.get("turn_id"),
|
|
"api_request_id": kwargs.get("api_request_id"),
|
|
"tool_call_id": kwargs.get("tool_call_id"),
|
|
"mode": self.settings.adaptive_mode,
|
|
}
|
|
),
|
|
metadata=_metadata(kwargs),
|
|
)
|
|
if inspect.isawaitable(result):
|
|
return await result
|
|
return result
|
|
|
|
managed_result = _resolve_awaitable(_managed_execute())
|
|
return raw_response["value"] if raw_response["set"] else managed_result
|
|
|
|
|
|
def register(ctx) -> None:
|
|
ctx.register_hook("on_session_start", on_session_start)
|
|
ctx.register_hook("on_session_end", on_session_end)
|
|
ctx.register_hook("on_session_finalize", on_session_finalize)
|
|
ctx.register_hook("on_session_reset", on_session_reset)
|
|
ctx.register_hook("pre_llm_call", on_pre_llm_call)
|
|
ctx.register_hook("post_llm_call", on_post_llm_call)
|
|
ctx.register_hook("pre_api_request", on_pre_api_request)
|
|
ctx.register_hook("post_api_request", on_post_api_request)
|
|
ctx.register_hook("api_request_error", on_api_request_error)
|
|
ctx.register_hook("pre_tool_call", on_pre_tool_call)
|
|
ctx.register_hook("post_tool_call", on_post_tool_call)
|
|
ctx.register_hook("pre_approval_request", on_pre_approval_request)
|
|
ctx.register_hook("post_approval_response", on_post_approval_response)
|
|
ctx.register_hook("subagent_start", on_subagent_start)
|
|
ctx.register_hook("subagent_stop", on_subagent_stop)
|
|
ctx.register_middleware("llm_execution", on_llm_execution_middleware)
|
|
ctx.register_middleware("tool_execution", on_tool_execution_middleware)
|
|
|
|
|
|
def on_session_start(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.ensure_session(kwargs))
|
|
|
|
|
|
def on_session_end(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: (runtime.mark("hermes.session.end", kwargs), runtime.export_atif(runtime.ensure_session(kwargs))))
|
|
|
|
|
|
def on_session_finalize(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.close_session(kwargs))
|
|
|
|
|
|
def on_session_reset(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.close_session(kwargs))
|
|
|
|
|
|
def on_pre_llm_call(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.mark("hermes.turn.start", kwargs))
|
|
|
|
|
|
def on_post_llm_call(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.mark("hermes.turn.end", kwargs))
|
|
|
|
|
|
def on_pre_api_request(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is None:
|
|
return
|
|
if runtime.managed_llm_enabled():
|
|
return
|
|
|
|
def _record() -> None:
|
|
state = runtime.ensure_session(kwargs)
|
|
request_payload = kwargs.get("request")
|
|
request_body = request_payload.get("body") if isinstance(request_payload, dict) else {}
|
|
request = runtime.nemo_relay.LLMRequest({}, _jsonable(request_body))
|
|
span = runtime.nemo_relay.llm.call(
|
|
str(kwargs.get("provider") or "llm"),
|
|
request,
|
|
handle=state.handle,
|
|
data=_jsonable({"turn_id": kwargs.get("turn_id"), "api_request_id": kwargs.get("api_request_id")}),
|
|
metadata=_metadata(kwargs),
|
|
model_name=str(kwargs.get("model") or ""),
|
|
)
|
|
state.llm_spans[_api_key(kwargs)] = span
|
|
|
|
_safe(_record)
|
|
|
|
|
|
def on_post_api_request(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is None:
|
|
return
|
|
if runtime.managed_llm_enabled():
|
|
return
|
|
|
|
def _record() -> None:
|
|
state = runtime.ensure_session(kwargs)
|
|
span = state.llm_spans.pop(_api_key(kwargs), None)
|
|
if span is None:
|
|
runtime.mark("hermes.api.response.unmatched", kwargs)
|
|
return
|
|
runtime.nemo_relay.llm.call_end(
|
|
span,
|
|
_jsonable(kwargs.get("response") or {}),
|
|
data=_jsonable({"usage": kwargs.get("usage"), "finish_reason": kwargs.get("finish_reason")}),
|
|
metadata=_metadata(kwargs),
|
|
)
|
|
|
|
_safe(_record)
|
|
|
|
|
|
def on_api_request_error(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is None:
|
|
return
|
|
if runtime.managed_llm_enabled():
|
|
return
|
|
|
|
def _record() -> None:
|
|
state = runtime.ensure_session(kwargs)
|
|
span = state.llm_spans.pop(_api_key(kwargs), None)
|
|
if span is None:
|
|
runtime.mark("hermes.api.error", kwargs)
|
|
return
|
|
runtime.nemo_relay.llm.call_end(
|
|
span,
|
|
{"error": _jsonable(kwargs.get("error") or {})},
|
|
data=_jsonable(kwargs),
|
|
metadata=_metadata(kwargs),
|
|
)
|
|
|
|
_safe(_record)
|
|
|
|
|
|
def on_pre_tool_call(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is None:
|
|
return
|
|
if runtime.managed_tool_enabled():
|
|
return
|
|
|
|
def _record() -> None:
|
|
state = runtime.ensure_session(kwargs)
|
|
span = runtime.nemo_relay.tools.call(
|
|
str(kwargs.get("tool_name") or "tool"),
|
|
_jsonable(kwargs.get("args") or {}),
|
|
handle=state.handle,
|
|
data=_jsonable({"turn_id": kwargs.get("turn_id"), "api_request_id": kwargs.get("api_request_id")}),
|
|
metadata=_metadata(kwargs),
|
|
tool_call_id=str(kwargs.get("tool_call_id") or ""),
|
|
)
|
|
state.tool_spans[_tool_key(kwargs)] = span
|
|
|
|
_safe(_record)
|
|
|
|
|
|
def on_post_tool_call(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is None:
|
|
return
|
|
if runtime.managed_tool_enabled():
|
|
return
|
|
|
|
def _record() -> None:
|
|
state = runtime.ensure_session(kwargs)
|
|
span = state.tool_spans.pop(_tool_key(kwargs), None)
|
|
if span is None:
|
|
runtime.mark("hermes.tool.response.unmatched", kwargs)
|
|
return
|
|
runtime.nemo_relay.tools.call_end(
|
|
span,
|
|
_jsonable(kwargs.get("result")),
|
|
data=_jsonable({"status": kwargs.get("status"), "duration_ms": kwargs.get("duration_ms")}),
|
|
metadata=_metadata(kwargs),
|
|
)
|
|
|
|
_safe(_record)
|
|
|
|
|
|
def on_pre_approval_request(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.mark("hermes.approval.request", kwargs))
|
|
|
|
|
|
def on_post_approval_response(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.mark("hermes.approval.response", kwargs))
|
|
|
|
|
|
def on_subagent_start(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.mark_subagent_start(kwargs))
|
|
|
|
|
|
def on_subagent_stop(**kwargs: Any) -> None:
|
|
runtime = _get_runtime()
|
|
if runtime is not None:
|
|
_safe(lambda: runtime.mark_subagent_stop(kwargs))
|
|
|
|
|
|
def on_llm_execution_middleware(**kwargs: Any) -> Any:
|
|
runtime = _get_runtime()
|
|
next_call = kwargs.get("next_call")
|
|
request = kwargs.get("request") or {}
|
|
if runtime is not None and runtime.managed_llm_enabled():
|
|
return runtime.execute_llm(kwargs)
|
|
if callable(next_call):
|
|
return next_call(request)
|
|
return request
|
|
|
|
|
|
def on_tool_execution_middleware(**kwargs: Any) -> Any:
|
|
runtime = _get_runtime()
|
|
next_call = kwargs.get("next_call")
|
|
args = kwargs.get("args") or {}
|
|
if runtime is not None and runtime.managed_tool_enabled():
|
|
return runtime.execute_tool(kwargs)
|
|
if callable(next_call):
|
|
return next_call(args)
|
|
return args
|
|
|
|
|
|
def _get_runtime() -> Optional[_Runtime]:
|
|
global _RUNTIME
|
|
with _LOCK:
|
|
if _RUNTIME is _INIT_FAILED:
|
|
return None
|
|
if isinstance(_RUNTIME, _Runtime):
|
|
return _RUNTIME
|
|
try:
|
|
import nemo_relay as nemo_runtime
|
|
except Exception as exc:
|
|
logger.debug("NeMo Relay plugin disabled: import failed: %s", exc)
|
|
_RUNTIME = _INIT_FAILED
|
|
return None
|
|
try:
|
|
_RUNTIME = _Runtime(nemo_relay=nemo_runtime, settings=_load_settings())
|
|
except Exception as exc:
|
|
logger.debug("NeMo Relay plugin disabled: init failed: %s", exc, exc_info=True)
|
|
_RUNTIME = _INIT_FAILED
|
|
return None
|
|
return _RUNTIME
|
|
|
|
|
|
def _load_settings() -> _Settings:
|
|
plugins_toml_path = _env("HERMES_NEMO_RELAY_PLUGINS_TOML")
|
|
plugins_config = _load_plugins_config(plugins_toml_path)
|
|
adaptive_config = _enabled_component_config(plugins_config, "adaptive")
|
|
atif_enabled = _env_bool("HERMES_NEMO_RELAY_ATIF_ENABLED")
|
|
if atif_enabled and _observability_exporter_enabled(plugins_config, "atif"):
|
|
logger.debug(
|
|
"NeMo Relay direct ATIF fallback disabled because plugins.toml observability.atif owns exporter lifecycle"
|
|
)
|
|
atif_enabled = False
|
|
return _Settings(
|
|
plugins_toml_path=plugins_toml_path,
|
|
plugins_config=plugins_config,
|
|
adaptive_enabled=adaptive_config is not None,
|
|
adaptive_mode=_adaptive_mode(adaptive_config),
|
|
atof_enabled=_env_bool("HERMES_NEMO_RELAY_ATOF_ENABLED"),
|
|
atof_output_directory=_env("HERMES_NEMO_RELAY_ATOF_OUTPUT_DIRECTORY"),
|
|
atof_filename=_env("HERMES_NEMO_RELAY_ATOF_FILENAME") or "hermes-atof.jsonl",
|
|
atof_mode=_env("HERMES_NEMO_RELAY_ATOF_MODE") or "append",
|
|
atif_enabled=atif_enabled,
|
|
atif_output_directory=_env("HERMES_NEMO_RELAY_ATIF_OUTPUT_DIRECTORY"),
|
|
atif_filename_template=_env("HERMES_NEMO_RELAY_ATIF_FILENAME_TEMPLATE") or "hermes-atif-{session_id}.json",
|
|
atif_subagent_export_mode=_atif_subagent_export_mode(),
|
|
atif_agent_name=_env("HERMES_NEMO_RELAY_ATIF_AGENT_NAME") or "Hermes Agent",
|
|
atif_agent_version=_env("HERMES_NEMO_RELAY_ATIF_AGENT_VERSION") or "unknown",
|
|
atif_model_name=_env("HERMES_NEMO_RELAY_ATIF_MODEL_NAME") or "unknown",
|
|
)
|
|
|
|
|
|
def _load_plugins_config(path: str) -> dict[str, Any] | None:
|
|
if not path:
|
|
return None
|
|
try:
|
|
return tomllib.loads(Path(path).read_text(encoding="utf-8"))
|
|
except Exception as exc:
|
|
logger.debug("NeMo Relay plugins.toml load failed: %s", exc, exc_info=True)
|
|
return None
|
|
|
|
|
|
def _enabled_component_config(
|
|
plugins_config: dict[str, Any] | None,
|
|
kind: str,
|
|
) -> dict[str, Any] | None:
|
|
if not isinstance(plugins_config, dict):
|
|
return None
|
|
components = plugins_config.get("components")
|
|
if not isinstance(components, list):
|
|
return None
|
|
for component in components:
|
|
if not isinstance(component, dict):
|
|
continue
|
|
if component.get("kind") != kind or not component.get("enabled", True):
|
|
continue
|
|
config = component.get("config")
|
|
return config if isinstance(config, dict) else {}
|
|
return None
|
|
|
|
|
|
def _adaptive_mode(config: dict[str, Any] | None) -> str:
|
|
if not isinstance(config, dict):
|
|
return "observe"
|
|
mode = config.get("mode")
|
|
if isinstance(mode, str) and mode.strip():
|
|
return mode.strip()
|
|
return "observe"
|
|
|
|
|
|
def _observability_exporter_enabled(
|
|
plugins_config: dict[str, Any] | None,
|
|
exporter_name: str,
|
|
) -> bool:
|
|
observability_config = _enabled_component_config(plugins_config, "observability")
|
|
if not isinstance(observability_config, dict):
|
|
return False
|
|
exporter_config = observability_config.get(exporter_name)
|
|
if not isinstance(exporter_config, dict):
|
|
return False
|
|
return exporter_config.get("enabled", True) is not False
|
|
|
|
|
|
def _env(name: str) -> str:
|
|
return os.environ.get(name, "").strip()
|
|
|
|
|
|
def _atif_subagent_export_mode() -> str:
|
|
mode = _env("HERMES_NEMO_RELAY_ATIF_SUBAGENT_EXPORT_MODE").lower()
|
|
return "all" if mode == "all" else "embedded"
|
|
|
|
|
|
def _env_bool(name: str) -> bool:
|
|
return _env(name).lower() in {"1", "true", "yes", "on"}
|
|
|
|
|
|
def _session_id(kwargs: dict[str, Any]) -> str:
|
|
return str(kwargs.get("session_id") or kwargs.get("parent_session_id") or "default")
|
|
|
|
|
|
def _child_session_id(kwargs: dict[str, Any]) -> str:
|
|
return str(kwargs.get("child_session_id") or "")
|
|
|
|
|
|
def _subagent_child_metadata(kwargs: dict[str, Any], parent_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
child_session_id = _child_session_id(kwargs)
|
|
metadata = {
|
|
"session_id": child_session_id,
|
|
"trajectory_id": child_session_id,
|
|
"nemo_relay_scope_role": "subagent",
|
|
}
|
|
for target, source in (
|
|
("subagent_id", "child_subagent_id"),
|
|
("child_session_id", "child_session_id"),
|
|
("child_subagent_id", "child_subagent_id"),
|
|
("child_role", "child_role"),
|
|
("parent_session_id", "parent_session_id"),
|
|
("parent_turn_id", "parent_turn_id"),
|
|
("parent_subagent_id", "parent_subagent_id"),
|
|
("parent_trajectory_id", "parent_trajectory_id"),
|
|
("telemetry_schema_version", "telemetry_schema_version"),
|
|
):
|
|
value = parent_metadata.get(source)
|
|
if value is not None:
|
|
metadata[target] = value
|
|
return metadata
|
|
|
|
|
|
def _api_key(kwargs: dict[str, Any]) -> str:
|
|
return str(kwargs.get("api_request_id") or f"{_session_id(kwargs)}:{kwargs.get('api_call_count') or 'api'}")
|
|
|
|
|
|
def _tool_key(kwargs: dict[str, Any]) -> str:
|
|
return str(
|
|
kwargs.get("tool_call_id")
|
|
or f"{_session_id(kwargs)}:{kwargs.get('turn_id') or ''}:{kwargs.get('tool_name') or 'tool'}"
|
|
)
|
|
|
|
|
|
def _metadata(kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
keys = (
|
|
"telemetry_schema_version",
|
|
"session_id",
|
|
"platform",
|
|
"task_id",
|
|
"turn_id",
|
|
"api_request_id",
|
|
"tool_call_id",
|
|
"parent_session_id",
|
|
"parent_turn_id",
|
|
"parent_subagent_id",
|
|
"child_session_id",
|
|
"child_subagent_id",
|
|
"child_role",
|
|
"child_status",
|
|
"provider",
|
|
"model",
|
|
"api_mode",
|
|
"status",
|
|
"reason",
|
|
)
|
|
metadata = {
|
|
key: _jsonable(kwargs[key])
|
|
for key in keys
|
|
if key in kwargs and kwargs[key] is not None
|
|
}
|
|
if "session_id" in metadata:
|
|
metadata.setdefault("trajectory_id", metadata["session_id"])
|
|
if "parent_session_id" in metadata:
|
|
metadata.setdefault("parent_trajectory_id", metadata["parent_session_id"])
|
|
if "child_session_id" in metadata:
|
|
metadata.setdefault("child_trajectory_id", metadata["child_session_id"])
|
|
return metadata
|
|
|
|
|
|
def _jsonable(value: Any) -> Any:
|
|
if value is None or isinstance(value, (str, int, float, bool)):
|
|
return value
|
|
if isinstance(value, dict):
|
|
return {str(k): _jsonable(v) for k, v in value.items()}
|
|
if isinstance(value, (list, tuple, set)):
|
|
return [_jsonable(v) for v in value]
|
|
try:
|
|
if hasattr(value, "model_dump"):
|
|
return _jsonable(value.model_dump(mode="json"))
|
|
except Exception:
|
|
pass
|
|
try:
|
|
if hasattr(value, "__dict__"):
|
|
return _jsonable(vars(value))
|
|
except Exception:
|
|
pass
|
|
try:
|
|
return json.loads(json.dumps(value, default=str))
|
|
except Exception:
|
|
return str(value)
|
|
|
|
|
|
def _value(obj: Any, key: str, default: Any = None) -> Any:
|
|
if isinstance(obj, dict):
|
|
return obj.get(key, default)
|
|
return getattr(obj, key, default)
|
|
|
|
|
|
def _llm_response_payload(response: Any) -> Any:
|
|
"""Return the LLM response shape NeMo Relay's ATIF conversion expects."""
|
|
payload = _jsonable(response)
|
|
if isinstance(payload, dict) and "assistant_message" in payload:
|
|
return payload
|
|
|
|
choices = _value(response, "choices")
|
|
if choices is None and isinstance(payload, dict):
|
|
choices = payload.get("choices")
|
|
first_choice = choices[0] if isinstance(choices, list) and choices else None
|
|
message = _value(first_choice, "message")
|
|
finish_reason = _value(first_choice, "finish_reason")
|
|
|
|
assistant_message: dict[str, Any] = {"role": "assistant", "content": ""}
|
|
if message is not None:
|
|
assistant_message["role"] = _value(message, "role", "assistant") or "assistant"
|
|
content = _value(message, "content")
|
|
if content is not None:
|
|
assistant_message["content"] = _jsonable(content)
|
|
tool_calls = _tool_calls_payload(_value(message, "tool_calls"))
|
|
if tool_calls:
|
|
assistant_message["tool_calls"] = tool_calls
|
|
reasoning = _value(message, "reasoning_content")
|
|
if reasoning is not None:
|
|
assistant_message["reasoning_content"] = _jsonable(reasoning)
|
|
elif isinstance(payload, dict):
|
|
assistant_message["content"] = payload.get("content") or payload.get("output_text") or ""
|
|
|
|
return {
|
|
"model": _value(response, "model", payload.get("model") if isinstance(payload, dict) else None),
|
|
"assistant_message": assistant_message,
|
|
"finish_reason": finish_reason,
|
|
"usage": _jsonable(_value(response, "usage", payload.get("usage") if isinstance(payload, dict) else None)),
|
|
}
|
|
|
|
|
|
def _tool_calls_payload(tool_calls: Any) -> list[dict[str, Any]]:
|
|
if not isinstance(tool_calls, list):
|
|
return []
|
|
normalized: list[dict[str, Any]] = []
|
|
for call in tool_calls:
|
|
function = _value(call, "function")
|
|
normalized.append(
|
|
{
|
|
"id": _value(call, "id"),
|
|
"type": _value(call, "type", "function") or "function",
|
|
"function": {
|
|
"name": _value(function, "name"),
|
|
"arguments": _value(function, "arguments"),
|
|
},
|
|
}
|
|
)
|
|
return normalized
|
|
|
|
|
|
def _safe(fn) -> None:
|
|
try:
|
|
fn()
|
|
except Exception as exc:
|
|
logger.debug("NeMo Relay hook handling failed: %s", exc, exc_info=True)
|
|
|
|
|
|
def _resolve_awaitable(value: Any) -> Any:
|
|
if not inspect.isawaitable(value):
|
|
return value
|
|
try:
|
|
asyncio.get_running_loop()
|
|
except RuntimeError:
|
|
return asyncio.run(value)
|
|
|
|
result: dict[str, Any] = {}
|
|
error: dict[str, BaseException] = {}
|
|
|
|
def _runner() -> None:
|
|
try:
|
|
result["value"] = asyncio.run(value)
|
|
except BaseException as exc: # pragma: no cover - re-raised below
|
|
error["exc"] = exc
|
|
|
|
thread = threading.Thread(
|
|
target=_runner,
|
|
name="hermes-nemo-relay-awaitable",
|
|
daemon=True,
|
|
)
|
|
thread.start()
|
|
thread.join()
|
|
if "exc" in error:
|
|
raise error["exc"]
|
|
return result.get("value")
|
|
|
|
|
|
def reset_for_tests() -> None:
|
|
global _RUNTIME
|
|
with _LOCK:
|
|
_RUNTIME = None
|