Merge pull request #29724 from bbednarski9/bbednarski/nmf-41B-nemoflow-plugin

feat(middleware): add adaptive middleware to hermes-agent, consumed by NeMo-Relay
This commit is contained in:
kshitij 2026-06-06 10:46:41 -07:00 committed by GitHub
commit d4a7bfd3aa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 2170 additions and 151 deletions

View file

@ -1239,6 +1239,28 @@ def run_conversation(
_sanitize_structure_non_ascii(api_kwargs)
if agent.api_mode == "codex_responses":
api_kwargs = agent._get_transport().preflight_kwargs(api_kwargs, allow_stream=False)
try:
from hermes_cli.middleware import apply_llm_request_middleware
_llm_request_mw = apply_llm_request_middleware(
api_kwargs,
task_id=effective_task_id,
turn_id=turn_id,
api_request_id=api_request_id,
session_id=agent.session_id or "",
platform=agent.platform or "",
model=agent.model,
provider=agent.provider,
base_url=agent.base_url,
api_mode=agent.api_mode,
api_call_count=api_call_count,
)
api_kwargs = _llm_request_mw.payload
_original_api_kwargs = _llm_request_mw.original_payload
_llm_middleware_trace = _llm_request_mw.trace
except Exception:
_original_api_kwargs = dict(api_kwargs)
_llm_middleware_trace = []
try:
from hermes_cli.plugins import (
@ -1291,6 +1313,7 @@ def run_conversation(
request_char_count=total_chars,
max_tokens=agent.max_tokens,
started_at=api_start_time,
middleware_trace=list(_llm_middleware_trace),
request=_request_payload,
)
except Exception:
@ -1349,7 +1372,24 @@ def run_conversation(
)
return agent._interruptible_api_call(next_api_kwargs)
response = _perform_api_call(api_kwargs)
from hermes_cli.middleware import run_llm_execution_middleware
response = run_llm_execution_middleware(
api_kwargs,
_perform_api_call,
original_request=_original_api_kwargs,
task_id=effective_task_id,
turn_id=turn_id,
api_request_id=api_request_id,
session_id=agent.session_id or "",
platform=agent.platform or "",
model=agent.model,
provider=agent.provider,
base_url=agent.base_url,
api_mode=agent.api_mode,
api_call_count=api_call_count,
middleware_trace=list(_llm_middleware_trace),
)
api_duration = time.time() - api_start_time