Track tool-call validity vs attempts; shape reward accordingly

- AgentResult now includes tool-call metrics: attempted, schema_valid,
  executed_ok, exec_error
- HermesAgentLoop normalizes args robustly without crashing, but
  distinguishes schema-valid args (dict) from coerced formats
  (stringified JSON, plain strings)
- SweSmithOracleEnv reward shaping now prefers schema-valid tool calls
  while still giving small credit for attempted tool use
This commit is contained in:
Shannon Sands 2026-02-14 09:17:05 +10:00
parent 35b2250b36
commit 499490d06a
2 changed files with 133 additions and 56 deletions

View file

@ -488,19 +488,27 @@ class SweSmithOracleEnv(HermesAgentBaseEnv):
"""
repo_dir = self._repo_name(item)
# Count valid tool calls (assistant messages that have tool_calls)
tool_call_count = sum(
# Count tool calls. Prefer the agent-loop metrics if present:
# - attempted: model called a known tool name
# - schema_valid: args were a dict (no coercion/double-decoding)
fallback_count = sum(
len(msg.get("tool_calls", []))
for msg in result.messages
if msg.get("role") == "assistant"
)
if tool_call_count == 0:
attempted = getattr(result, "tool_calls_attempted", fallback_count)
schema_valid = getattr(result, "tool_calls_schema_valid", fallback_count)
if attempted == 0:
print(f"[SweSmithOracleEnv] No tool calls made; score=0.0", flush=True)
return 0.0
# Partial reward: 0.05 per tool call, capped at 0.3
tool_call_reward = min(tool_call_count * 0.05, 0.3)
# Shaping: reward attempting tool use a little, but reward schema-valid calls more.
# Full credit per call is still 0.05 when schema_valid.
attempt_reward = min(attempted * 0.02, 0.10)
schema_reward = min(schema_valid * 0.03, 0.20)
tool_call_reward = min(attempt_reward + schema_reward, 0.30)
nodeids = self._tests_for_item(item)
if not nodeids: