diff --git a/agent/moa_loop.py b/agent/moa_loop.py
index ed566325eeb..5fe984e7fd2 100644
--- a/agent/moa_loop.py
+++ b/agent/moa_loop.py
@@ -26,6 +26,34 @@ logger = logging.getLogger(__name__)
 # opening dozens of sockets at once.
 _MAX_REFERENCE_WORKERS = 8
 
+# System prompt prepended to every reference-model call. References are
+# advisory — they do NOT act, call tools, or own the task. Without this
+# framing a reference receives the bare trimmed conversation and assumes it is
+# the acting agent: it then refuses ("I can't access repositories / URLs from
+# here") or tries to call tools it doesn't have. The prompt reframes the model
+# as an analyst whose job is to reason about the presented state and hand its
+# best thinking to the aggregator/orchestrator that will actually act.
+_REFERENCE_SYSTEM_PROMPT = (
+    "You are a reference advisor in a Mixture of Agents (MoA) process. You are "
+    "NOT the acting agent and you do NOT execute anything: you cannot call "
+    "tools, run commands, browse, or access files, repositories, or URLs, and "
+    "you should not try to or apologize for being unable to. A separate "
+    "aggregator/orchestrator model holds those capabilities and will take the "
+    "actual actions.\n\n"
+    "The conversation below is the current state of a task handled by that "
+    "acting agent. Your job is to give your most intelligent analysis of that "
+    "state: understand the goal, reason about the problem, and advise on what "
+    "to do next. Surface the best approach, concrete next steps and tool-use "
+    "strategy, likely pitfalls and risks, and anything the acting agent may "
+    "have missed or gotten wrong. Assume any referenced files, URLs, or "
+    "systems exist and reason about them from the context given rather than "
+    "asking for access.\n\n"
+    "Respond with your advice directly — no preamble, no disclaimers about "
+    "tools or access. Your response is private guidance handed to the "
+    "aggregator, not an answer shown to the user."
+)
+
+
 
 def _slot_label(slot: dict[str, str]) -> str:
     return f"{slot.get('provider', '').strip()}:{slot.get('model', '').strip()}"
@@ -100,9 +128,14 @@ def _run_reference(
     """
     label = _slot_label(slot)
     try:
+        # Prepend the advisory-role system prompt so the reference understands
+        # it is analyzing state for an aggregator, not acting on the task. The
+        # trimmed view (_reference_messages) already strips the agent's own
+        # system prompt, so this is the only system message the reference sees.
+        messages = [{"role": "system", "content": _REFERENCE_SYSTEM_PROMPT}, *ref_messages]
         response = call_llm(
             task="moa_reference",
-            messages=ref_messages,
+            messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
             **_slot_runtime(slot),
@@ -169,6 +202,17 @@ def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
     that reject orphan ``tool`` messages or ``tool_calls`` the reference never
     produced. We keep only the user/assistant *text* turns, dropping the
     system prompt, any ``tool``-role messages, and any ``tool_calls`` payloads.
+
+    The trimmed view MUST end with a ``user`` turn. An advisory reference call
+    answers the latest user input; it must never end with an ``assistant``
+    turn, which Anthropic (and OpenRouter→Anthropic) interpret as an assistant
+    *prefill* the model should continue — some models (e.g. Claude Opus 4.8)
+    reject prefill outright with ``400 ... must end with a user message``. This
+    is the common mid-tool-loop case: the last assistant turn carried
+    interleaved reasoning text plus tool calls, so its text survives the trim
+    while the following ``tool`` result is dropped, leaving a trailing
+    assistant turn. We strip any trailing assistant turns so the reference sees
+    a user message last.
     """
     trimmed: list[dict[str, Any]] = []
     for msg in messages:
@@ -186,9 +230,14 @@ def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
             # Assistant turn that was purely tool calls — nothing advisory.
             continue
         trimmed.append({"role": role, "content": text})
+    # Advisory calls must end on a user turn (no assistant prefill). Drop any
+    # trailing assistant turns left by the tool-loop trim above.
+    while trimmed and trimmed[-1].get("role") == "assistant":
+        trimmed.pop()
     if not trimmed:
-        # Degenerate case (e.g. first turn was stripped): fall back to a
-        # minimal user turn so the reference still has something to answer.
+        # Degenerate case (e.g. first turn was stripped, or the view trimmed
+        # down to assistant-only): fall back to the latest user turn so the
+        # reference still has something to answer.
         for msg in reversed(messages):
             if msg.get("role") == "user" and isinstance(msg.get("content"), str):
                 return [{"role": "user", "content": msg["content"]}]
diff --git a/tests/run_agent/test_moa_loop_mode.py b/tests/run_agent/test_moa_loop_mode.py
index c05dd3b267f..b07a8156281 100644
--- a/tests/run_agent/test_moa_loop_mode.py
+++ b/tests/run_agent/test_moa_loop_mode.py
@@ -236,12 +236,81 @@ def test_reference_messages_strips_system_and_tool_history():
     # System prompt, tool-call-only assistant turn, and tool result are gone.
     assert all(m["role"] in ("user", "assistant") for m in trimmed)
     assert all("tool_calls" not in m for m in trimmed)
+    # The advisory view must end on a user turn — a trailing assistant turn is
+    # treated by Anthropic as an assistant prefill (400 on no-prefill models).
+    # The only kept user turn here is the prompt, so the trailing assistant
+    # answer is stripped.
     assert trimmed == [
         {"role": "user", "content": "do the thing"},
-        {"role": "assistant", "content": "here is my answer"},
     ]
 
 
+def test_reference_messages_ends_with_user_not_assistant_prefill():
+    """Advisory reference views must never end on an assistant turn.
+
+    Mid-tool-loop, the last assistant turn carries interleaved reasoning text
+    plus tool calls; its text survives the trim while the following tool result
+    is dropped, leaving a trailing assistant turn. Anthropic (and
+    OpenRouter→Anthropic) treat that as an assistant prefill the model should
+    continue, and no-prefill models (e.g. Claude Opus 4.8) reject it with
+    ``400 ... must end with a user message``. The trim must drop trailing
+    assistant turns while preserving intervening ones.
+    """
+    from agent.moa_loop import _reference_messages
+
+    messages = [
+        {"role": "user", "content": "q1"},
+        {"role": "assistant", "content": "a1"},
+        {"role": "user", "content": "q2 current"},
+        {
+            "role": "assistant",
+            "content": "let me reason then call a tool",
+            "tool_calls": [{"id": "c1", "function": {"name": "f", "arguments": "{}"}}],
+        },
+        {"role": "tool", "tool_call_id": "c1", "content": "tool result"},
+    ]
+
+    trimmed = _reference_messages(messages)
+
+    assert trimmed, "advisory view should not be empty"
+    assert trimmed[-1]["role"] == "user"
+    # Intervening assistant context is preserved; only the trailing one drops.
+    assert trimmed == [
+        {"role": "user", "content": "q1"},
+        {"role": "assistant", "content": "a1"},
+        {"role": "user", "content": "q2 current"},
+    ]
+
+
+def test_run_reference_prepends_advisory_system_prompt(monkeypatch):
+    """Each reference call gets the advisory-role system prompt first.
+
+    Without it the reference assumes it is the acting agent and refuses ("I
+    can't access repositories/URLs from here") or tries to call tools it
+    doesn't have. The system prompt reframes it as an analyst advising the
+    aggregator, and the advisory transcript still ends on a user turn.
+    """
+    from agent.moa_loop import _REFERENCE_SYSTEM_PROMPT, _run_reference
+
+    captured = {}
+
+    def fake_call_llm(**kwargs):
+        captured.update(kwargs)
+        return _response("advice")
+
+    monkeypatch.setattr("agent.moa_loop.call_llm", fake_call_llm)
+
+    label, text = _run_reference(
+        {"provider": "openai-codex", "model": "gpt-5.5"},
+        [{"role": "user", "content": "review this PR"}],
+    )
+
+    assert text == "advice"
+    msgs = captured["messages"]
+    assert msgs[0] == {"role": "system", "content": _REFERENCE_SYSTEM_PROMPT}
+    assert msgs[-1]["role"] == "user"
+
+
 def test_moa_facade_references_get_trimmed_messages(monkeypatch, tmp_path):
     home = tmp_path / ".hermes"
     home.mkdir()
@@ -282,8 +351,13 @@ moa:
     )
 
     ref_call = next(c for c in calls if c["task"] == "moa_reference")
-    # Reference never sees system prompt or tool-role messages.
-    assert all(m["role"] == "user" for m in ref_call["messages"])
+    # Reference gets the advisory-role system prompt first, then user turns
+    # only — never the agent's own system prompt or tool-role messages.
+    ref_msgs = ref_call["messages"]
+    assert ref_msgs[0]["role"] == "system"
+    assert "reference advisor" in ref_msgs[0]["content"].lower()
+    assert "huge hermes system prompt" not in ref_msgs[0]["content"]
+    assert all(m["role"] == "user" for m in ref_msgs[1:])
     assert ref_call.get("tools") in (None, [])
     # Aggregator still receives the original messages + tool schema.
     agg_call = next(c for c in calls if c["task"] == "moa_aggregator")