mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
slow completions over group_size 4, debugging added
This commit is contained in:
parent
5b82190460
commit
88286f6da3
3 changed files with 22 additions and 5 deletions
|
|
@ -353,17 +353,25 @@ class AtroposAgent:
|
||||||
if not wait_every_s or wait_every_s <= 0:
|
if not wait_every_s or wait_every_s <= 0:
|
||||||
return await managed.chat_completion(**chat_kwargs)
|
return await managed.chat_completion(**chat_kwargs)
|
||||||
|
|
||||||
|
# Heartbeat mode: wait in chunks without cancelling the underlying request.
|
||||||
|
# NOTE: do NOT use `asyncio.wait_for(task, timeout=...)` here, because a timeout
|
||||||
|
# will cancel the task and surface as `CancelledError` on the next loop.
|
||||||
task = asyncio.create_task(managed.chat_completion(**chat_kwargs))
|
task = asyncio.create_task(managed.chat_completion(**chat_kwargs))
|
||||||
t0 = time.perf_counter()
|
t0 = time.perf_counter()
|
||||||
while True:
|
|
||||||
try:
|
try:
|
||||||
return await asyncio.wait_for(task, timeout=wait_every_s)
|
while True:
|
||||||
except TimeoutError:
|
done, _pending = await asyncio.wait({task}, timeout=wait_every_s)
|
||||||
|
if task in done:
|
||||||
|
return task.result()
|
||||||
|
|
||||||
waited = time.perf_counter() - t0
|
waited = time.perf_counter() - t0
|
||||||
print(
|
print(
|
||||||
f"[AtroposAgent] step={step_num} still waiting for chat_completion... ({waited:.1f}s)",
|
f"[AtroposAgent] step={step_num} still waiting for chat_completion... ({waited:.1f}s)",
|
||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
task.cancel()
|
||||||
|
raise
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if timeout_s and timeout_s > 0:
|
if timeout_s and timeout_s > 0:
|
||||||
|
|
|
||||||
|
|
@ -449,6 +449,8 @@ class AgentEnv(BaseEnv, ABC, Generic[AgentEnvConfigT]):
|
||||||
if len(items) != self.config.group_size:
|
if len(items) != self.config.group_size:
|
||||||
return None, backlog
|
return None, backlog
|
||||||
|
|
||||||
|
# TODO: Mack sure logprobs included
|
||||||
|
|
||||||
group: ScoredDataGroup = ScoredDataGroup(
|
group: ScoredDataGroup = ScoredDataGroup(
|
||||||
tokens=[],
|
tokens=[],
|
||||||
masks=[],
|
masks=[],
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,8 @@ This environment is intentionally minimal:
|
||||||
- runs an AtroposAgent tool loop to apply a fix
|
- runs an AtroposAgent tool loop to apply a fix
|
||||||
- verifies by running pytest nodeids from the dataset (reward = pass/fail)
|
- verifies by running pytest nodeids from the dataset (reward = pass/fail)
|
||||||
- Python only (no multi-language support currently, need to properly bauild & add to dropbox)
|
- Python only (no multi-language support currently, need to properly bauild & add to dropbox)
|
||||||
|
- TODO: Get the other nonpython sandboxes up and running, then add a config knob to switch between them per row
|
||||||
|
- oh and add to dockerhub
|
||||||
|
|
||||||
Dataset: NousResearch/SWE-smith-oracle (train; does NOT use SWE-bench eval set).
|
Dataset: NousResearch/SWE-smith-oracle (train; does NOT use SWE-bench eval set).
|
||||||
"""
|
"""
|
||||||
|
|
@ -185,6 +187,8 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
|
||||||
# The dataset "text" field can be extremely large (e.g. includes large code blobs
|
# The dataset "text" field can be extremely large (e.g. includes large code blobs
|
||||||
# and long test lists). In local dev and bring-up runs this can make the first LLM
|
# and long test lists). In local dev and bring-up runs this can make the first LLM
|
||||||
# call appear "hung" while the model chews through a massive prompt. Keep a cap.
|
# call appear "hung" while the model chews through a massive prompt. Keep a cap.
|
||||||
|
|
||||||
|
# TODO: Remove, smoke test only
|
||||||
def _cap(s: str, n: int) -> tuple[str, bool]:
|
def _cap(s: str, n: int) -> tuple[str, bool]:
|
||||||
if len(s) <= n:
|
if len(s) <= n:
|
||||||
return s, False
|
return s, False
|
||||||
|
|
@ -200,6 +204,7 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
|
||||||
|
|
||||||
repo_dir = self._repo_name(item)
|
repo_dir = self._repo_name(item)
|
||||||
verify_note = ""
|
verify_note = ""
|
||||||
|
# TODO: Remove, smoke testing only
|
||||||
if self.config.verification_mode == "install":
|
if self.config.verification_mode == "install":
|
||||||
verify_note = (
|
verify_note = (
|
||||||
"\nVerification for this run is INSTALL-ONLY:\n"
|
"\nVerification for this run is INSTALL-ONLY:\n"
|
||||||
|
|
@ -272,7 +277,9 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
|
||||||
|
|
||||||
# Prefer a lightweight "fetch by sha" to avoid pulling full history.
|
# Prefer a lightweight "fetch by sha" to avoid pulling full history.
|
||||||
# If it fails (some servers disallow fetching unadvertised objects, or we hit
|
# If it fails (some servers disallow fetching unadvertised objects, or we hit
|
||||||
# shallow-object edge cases), fall back to a full clone.
|
# shallow-object edge cases), fall back to a full clone
|
||||||
|
|
||||||
|
# TODO: tbh, should just do this before setting up worktree & after sandbox build
|
||||||
clone_attempts: list[tuple[str, str]] = []
|
clone_attempts: list[tuple[str, str]] = []
|
||||||
clone_attempts.append(
|
clone_attempts.append(
|
||||||
(
|
(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue