fix(honcho): move context injection from system prompt to prefetch, fix honcho_context crash

- system_prompt_block() now returns static header only (matching ABC contract)
  All other providers already did this; Honcho was the only one baking live
  user data into system prompt, freezing it on turn 1 forever

- prefetch() assembles two layers:
  Layer 1: base context (representation + card) from peer.context(), cached
  and refreshed on context_cadence (not frozen)
  Layer 2: dialectic supplement, refreshed on dialectic_cadence

- Context and dialectic cadence now checked independently in queue_prefetch()
  Previously context refresh was gated behind dialectic cadence

- Fix honcho_context tool crash: Honcho SDK Message objects use .peer_id
  not .role — was silently returning 'No context available yet' due to
  AttributeError caught by broad except
This commit is contained in:
Erosika 2026-04-14 18:41:23 -04:00
parent af5bbda7d2
commit ef7f31562b
2 changed files with 64 additions and 36 deletions

View file

@ -196,9 +196,9 @@ class HonchoMemoryProvider(MemoryProvider):
# B1: recall_mode — set during initialize from config
self._recall_mode = "hybrid" # "context", "tools", or "hybrid"
# B4: First-turn context baking
self._first_turn_context: Optional[str] = None
self._first_turn_lock = threading.Lock()
# Base context cache — refreshed on context_cadence, not frozen
self._base_context_cache: Optional[str] = None
self._base_context_lock = threading.Lock()
# B5: Cost-awareness turn counting and cadence
self._turn_count = 0
@ -447,9 +447,9 @@ class HonchoMemoryProvider(MemoryProvider):
def system_prompt_block(self) -> str:
"""Return system prompt text, adapted by recall_mode.
B4: On the FIRST call, fetch and bake the full Honcho context
(user representation, peer card, AI representation, continuity synthesis).
Subsequent calls return the cached block for prompt caching stability.
Returns only the mode header and tool instructions static text
that doesn't change between turns (prompt-cache friendly).
Live context (representation, card) is injected via prefetch().
"""
if self._cron_skipped:
return ""
@ -463,20 +463,6 @@ class HonchoMemoryProvider(MemoryProvider):
)
return ""
# ----- B4: First-turn context baking -----
first_turn_block = ""
if self._recall_mode in ("context", "hybrid"):
with self._first_turn_lock:
if self._first_turn_context is None:
# First call — fetch and cache
try:
ctx = self._manager.get_prefetch_context(self._session_key)
self._first_turn_context = self._format_first_turn_context(ctx) if ctx else ""
except Exception as e:
logger.debug("Honcho first-turn context fetch failed: %s", e)
self._first_turn_context = ""
first_turn_block = self._first_turn_context
# ----- B1: adapt text based on recall_mode -----
if self._recall_mode == "context":
header = (
@ -504,12 +490,14 @@ class HonchoMemoryProvider(MemoryProvider):
"honcho_conclude to save facts about the user."
)
if first_turn_block:
return f"{header}\n\n{first_turn_block}"
return header
def prefetch(self, query: str, *, session_id: str = "") -> str:
"""Return prefetched dialectic context from background thread.
"""Return base context (representation + card) plus dialectic supplement.
Assembles two layers:
1. Base context from peer.context() cached, refreshed on context_cadence
2. Dialectic supplement cached, refreshed on dialectic_cadence
B1: Returns empty when recall_mode is "tools" (no injection).
B5: Respects injection_frequency "first-turn" returns cached/empty after turn 0.
@ -526,14 +514,51 @@ class HonchoMemoryProvider(MemoryProvider):
if self._injection_frequency == "first-turn" and self._turn_count > 0:
return ""
parts = []
# ----- Layer 1: Base context (representation + card) -----
# On first call, fetch synchronously so turn 1 isn't empty.
# After that, serve from cache and refresh in background on cadence.
with self._base_context_lock:
if self._base_context_cache is None:
# First call — synchronous fetch
try:
ctx = self._manager.get_prefetch_context(self._session_key)
self._base_context_cache = self._format_first_turn_context(ctx) if ctx else ""
self._last_context_turn = self._turn_count
except Exception as e:
logger.debug("Honcho base context fetch failed: %s", e)
self._base_context_cache = ""
base_context = self._base_context_cache
# Check if background context prefetch has a fresher result
if self._manager:
fresh_ctx = self._manager.pop_context_result(self._session_key)
if fresh_ctx:
formatted = self._format_first_turn_context(fresh_ctx)
if formatted:
with self._base_context_lock:
self._base_context_cache = formatted
base_context = formatted
if base_context:
parts.append(base_context)
# ----- Layer 2: Dialectic supplement -----
if self._prefetch_thread and self._prefetch_thread.is_alive():
self._prefetch_thread.join(timeout=3.0)
with self._prefetch_lock:
result = self._prefetch_result
dialectic_result = self._prefetch_result
self._prefetch_result = ""
if not result:
if dialectic_result and dialectic_result.strip():
parts.append(dialectic_result)
if not parts:
return ""
result = "\n\n".join(parts)
# ----- Port #3265: token budget enforcement -----
result = self._truncate_to_budget(result)
@ -554,9 +579,11 @@ class HonchoMemoryProvider(MemoryProvider):
return truncated + ""
def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
"""Fire a background dialectic query for the upcoming turn.
"""Fire background prefetch threads for the upcoming turn.
B5: Checks cadence before firing background threads.
B5: Checks cadence independently for dialectic and context refresh.
Context refresh updates the base layer (representation + card).
Dialectic fires the LLM reasoning supplement.
"""
if self._cron_skipped:
return
@ -567,6 +594,15 @@ class HonchoMemoryProvider(MemoryProvider):
if self._recall_mode == "tools":
return
# ----- Context refresh (base layer) — independent cadence -----
if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
self._last_context_turn = self._turn_count
try:
self._manager.prefetch_context(self._session_key, query)
except Exception as e:
logger.debug("Honcho context prefetch failed: %s", e)
# ----- Dialectic prefetch (supplement layer) -----
# B5: cadence check — skip if too soon since last dialectic call
if self._dialectic_cadence > 1:
if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
@ -592,14 +628,6 @@ class HonchoMemoryProvider(MemoryProvider):
)
self._prefetch_thread.start()
# Also fire context prefetch if cadence allows
if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
self._last_context_turn = self._turn_count
try:
self._manager.prefetch_context(self._session_key, query)
except Exception as e:
logger.debug("Honcho context prefetch failed: %s", e)
def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
"""Track turn count for cadence and injection_frequency logic."""
self._turn_count = turn_number

View file

@ -942,7 +942,7 @@ class HonchoSessionManager:
if ctx.messages:
recent = ctx.messages[-10:] # last 10 messages
result["recent_messages"] = [
{"role": m.role, "content": m.content[:500]}
{"role": getattr(m, "peer_id", "unknown"), "content": (m.content or "")[:500]}
for m in recent
]