diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index b9a415c1d..a42786304 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -206,11 +206,11 @@ PLATFORM_HINTS = {
         "contextually appropriate."
     ),
     "cron": (
-        "You are running as a scheduled cron job. Your final response is automatically "
-        "delivered to the job's configured destination, so do not use send_message to "
-        "send to that same target again. If you want the user to receive something in "
-        "the scheduled destination, put it directly in your final response. Use "
-        "send_message only for additional or different targets."
+        "You are running as a scheduled cron job. There is no user present — you "
+        "cannot ask questions, request clarification, or wait for follow-up. Execute "
+        "the task fully and autonomously, making reasonable decisions where needed. "
+        "Your final response is automatically delivered to the job's configured "
+        "destination — put the primary content directly in your response."
     ),
     "cli": (
         "You are a CLI AI Agent. Try not to use markdown but simple text "
diff --git a/cli.py b/cli.py
index ccc1e0d68..af8ac4efc 100755
--- a/cli.py
+++ b/cli.py
@@ -3517,8 +3517,17 @@ class HermesCLI:
                 # Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5")
                 current_provider = self.provider or self.requested_provider or "openrouter"
                 target_provider, new_model = parse_model_input(raw_input, current_provider)
-                # Auto-detect provider when no explicit provider:model syntax was used
-                if target_provider == current_provider:
+                # Auto-detect provider when no explicit provider:model syntax was used.
+                # Skip auto-detection for custom providers — the model name might
+                # coincidentally match a known provider's catalog, but the user
+                # intends to use it on their custom endpoint.  Require explicit
+                # provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex)
+                # to switch away from a custom endpoint.
+                _base = self.base_url or ""
+                is_custom = current_provider == "custom" or (
+                    "localhost" in _base or "127.0.0.1" in _base
+                )
+                if target_provider == current_provider and not is_custom:
                     from hermes_cli.models import detect_provider_for_model
                     detected = detect_provider_for_model(new_model, current_provider)
                     if detected:
@@ -3586,6 +3595,13 @@ class HermesCLI:
                         if message:
                             print(f"  Reason: {message}")
                         print("  Note: Model will revert on restart. Use a verified model to save to config.")
+
+                    # Helpful hint when staying on a custom endpoint
+                    if is_custom and not provider_changed:
+                        endpoint = self.base_url or "custom endpoint"
+                        print(f"  Endpoint: {endpoint}")
+                        print(f"  Tip: To switch providers, use /model provider:model")
+                        print(f"       e.g. /model openai-codex:gpt-5.2-codex")
             else:
                 self._show_model_and_providers()
         elif canonical == "provider":
diff --git a/cron/scheduler.py b/cron/scheduler.py
index e996df079..417c3eb43 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -391,7 +391,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
             providers_ignored=pr.get("ignore"),
             providers_order=pr.get("order"),
             provider_sort=pr.get("sort"),
-            disabled_toolsets=["cronjob"],
+            disabled_toolsets=["cronjob", "messaging", "clarify"],
             quiet_mode=True,
             platform="cron",
             session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index d2a7693ac..086acfa2b 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -670,6 +670,11 @@ OPTIONAL_ENV_VARS = {
         "password": True,
         "category": "tool",
     },
+    "HONCHO_BASE_URL": {
+        "description": "Base URL for self-hosted Honcho instances (no API key needed)",
+        "prompt": "Honcho base URL (e.g. http://localhost:8000)",
+        "category": "tool",
+    },
 
     # ── Messaging platforms ──
     "TELEGRAM_BOT_TOKEN": {
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index b00db5cf1..8c2979b6b 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -24,6 +24,18 @@ def _normalize_custom_provider_name(value: str) -> str:
     return value.strip().lower().replace(" ", "-")
 
 
+def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
+    """Auto-detect api_mode from the resolved base URL.
+
+    Direct api.openai.com endpoints need the Responses API for GPT-5.x
+    tool calls with reasoning (chat/completions returns 400).
+    """
+    normalized = (base_url or "").strip().lower().rstrip("/")
+    if "api.openai.com" in normalized and "openrouter" not in normalized:
+        return "codex_responses"
+    return None
+
+
 def _auto_detect_local_model(base_url: str) -> str:
     """Query a local server for its model name when only one model is loaded."""
     if not base_url:
@@ -185,7 +197,9 @@ def _resolve_named_custom_runtime(
 
     return {
         "provider": "openrouter",
-        "api_mode": custom_provider.get("api_mode", "chat_completions"),
+        "api_mode": custom_provider.get("api_mode")
+        or _detect_api_mode_for_url(base_url)
+        or "chat_completions",
         "base_url": base_url,
         "api_key": api_key,
         "source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
@@ -263,7 +277,9 @@ def _resolve_openrouter_runtime(
 
     return {
         "provider": "openrouter",
-        "api_mode": _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions",
+        "api_mode": _parse_api_mode(model_cfg.get("api_mode"))
+        or _detect_api_mode_for_url(base_url)
+        or "chat_completions",
         "base_url": base_url,
         "api_key": api_key,
         "source": source,
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 759576ada..4411241ad 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -117,11 +117,13 @@ class HonchoClientConfig:
     def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
         """Create config from environment variables (fallback)."""
         api_key = os.environ.get("HONCHO_API_KEY")
+        base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
         return cls(
             workspace_id=workspace_id,
             api_key=api_key,
             environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
-            enabled=bool(api_key),
+            base_url=base_url,
+            enabled=bool(api_key or base_url),
         )
 
     @classmethod
@@ -171,8 +173,14 @@ class HonchoClientConfig:
             or raw.get("environment", "production")
         )
 
-        # Auto-enable when API key is present (unless explicitly disabled)
-        # Host-level enabled wins, then root-level, then auto-enable if key exists.
+        base_url = (
+            raw.get("baseUrl")
+            or os.environ.get("HONCHO_BASE_URL", "").strip()
+            or None
+        )
+
+        # Auto-enable when API key or base_url is present (unless explicitly disabled)
+        # Host-level enabled wins, then root-level, then auto-enable if key/url exists.
         host_enabled = host_block.get("enabled")
         root_enabled = raw.get("enabled")
         if host_enabled is not None:
@@ -180,8 +188,8 @@ class HonchoClientConfig:
         elif root_enabled is not None:
             enabled = root_enabled
         else:
-            # Not explicitly set anywhere -> auto-enable if API key exists
-            enabled = bool(api_key)
+            # Not explicitly set anywhere -> auto-enable if API key or base_url exists
+            enabled = bool(api_key or base_url)
 
         # write_frequency: accept int or string
         raw_wf = (
@@ -214,6 +222,7 @@ class HonchoClientConfig:
             workspace_id=workspace,
             api_key=api_key,
             environment=environment,
+            base_url=base_url,
             peer_name=host_block.get("peerName") or raw.get("peerName"),
             ai_peer=ai_peer,
             linked_hosts=linked_hosts,
@@ -348,11 +357,12 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
     if config is None:
         config = HonchoClientConfig.from_global_config()
 
-    if not config.api_key:
+    if not config.api_key and not config.base_url:
         raise ValueError(
             "Honcho API key not found. "
             "Get your API key at https://app.honcho.dev, "
-            "then run 'hermes honcho setup' or set HONCHO_API_KEY."
+            "then run 'hermes honcho setup' or set HONCHO_API_KEY. "
+            "For local instances, set HONCHO_BASE_URL instead."
         )
 
     try:
diff --git a/run_agent.py b/run_agent.py
index e6abda1e2..c6a616c2d 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -501,6 +501,12 @@ class AIAgent:
         else:
             self.api_mode = "chat_completions"
 
+        # Direct OpenAI sessions use the Responses API path.  GPT-5.x tool
+        # calls with reasoning are rejected on /v1/chat/completions, and
+        # Hermes is a tool-using client by default.
+        if self.api_mode == "chat_completions" and self._is_direct_openai_url():
+            self.api_mode = "codex_responses"
+
         # Pre-warm OpenRouter model metadata cache in a background thread.
         # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
         # HTTP request on the first API response when pricing is estimated.
@@ -1057,6 +1063,9 @@ class AIAgent:
         if hasattr(self, "context_compressor") and self.context_compressor:
             self.context_compressor.last_prompt_tokens = 0
             self.context_compressor.last_completion_tokens = 0
+            self.context_compressor.last_total_tokens = 0
+            self.context_compressor.compression_count = 0
+            self.context_compressor._context_probed = False
     
     @staticmethod
     def _safe_print(*args, **kwargs):
@@ -1085,6 +1094,11 @@ class AIAgent:
             return
         self._safe_print(*args, **kwargs)
 
+    def _is_direct_openai_url(self, base_url: str = None) -> bool:
+        """Return True when a base URL targets OpenAI's native API."""
+        url = (base_url or self._base_url_lower).lower()
+        return "api.openai.com" in url and "openrouter" not in url
+
     def _max_tokens_param(self, value: int) -> dict:
         """Return the correct max tokens kwarg for the current provider.
         
@@ -1092,11 +1106,7 @@ class AIAgent:
         'max_completion_tokens'. OpenRouter, local models, and older
         OpenAI models use 'max_tokens'.
         """
-        _is_direct_openai = (
-            "api.openai.com" in self._base_url_lower
-            and "openrouter" not in self._base_url_lower
-        )
-        if _is_direct_openai:
+        if self._is_direct_openai_url():
             return {"max_completion_tokens": value}
         return {"max_tokens": value}
 
@@ -3558,13 +3568,15 @@ class AIAgent:
                     fb_provider)
                 return False
 
-            # Determine api_mode from provider
+            # Determine api_mode from provider / base URL
             fb_api_mode = "chat_completions"
             fb_base_url = str(fb_client.base_url)
             if fb_provider == "openai-codex":
                 fb_api_mode = "codex_responses"
             elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
                 fb_api_mode = "anthropic_messages"
+            elif self._is_direct_openai_url(fb_base_url):
+                fb_api_mode = "codex_responses"
 
             old_model = self.model
             self.model = fb_model
diff --git a/tests/honcho_integration/test_client.py b/tests/honcho_integration/test_client.py
index b1ae29c54..a9a837e62 100644
--- a/tests/honcho_integration/test_client.py
+++ b/tests/honcho_integration/test_client.py
@@ -60,6 +60,21 @@ class TestFromEnv:
         config = HonchoClientConfig.from_env(workspace_id="custom")
         assert config.workspace_id == "custom"
 
+    def test_reads_base_url_from_env(self):
+        with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
+            config = HonchoClientConfig.from_env()
+        assert config.base_url == "http://localhost:8000"
+        assert config.enabled is True
+
+    def test_enabled_without_api_key_when_base_url_set(self):
+        """base_url alone (no API key) is sufficient to enable a local instance."""
+        with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
+            os.environ.pop("HONCHO_API_KEY", None)
+            config = HonchoClientConfig.from_env()
+        assert config.api_key is None
+        assert config.base_url == "http://localhost:8000"
+        assert config.enabled is True
+
 
 class TestFromGlobalConfig:
     def test_missing_config_falls_back_to_env(self, tmp_path):
@@ -188,6 +203,36 @@ class TestFromGlobalConfig:
             config = HonchoClientConfig.from_global_config(config_path=config_file)
         assert config.api_key == "env-key"
 
+    def test_base_url_env_fallback(self, tmp_path):
+        """HONCHO_BASE_URL env var is used when no baseUrl in config JSON."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"workspace": "local"}))
+
+        with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
+            config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.base_url == "http://localhost:8000"
+        assert config.enabled is True
+
+    def test_base_url_from_config_root(self, tmp_path):
+        """baseUrl in config root is read and takes precedence over env var."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"baseUrl": "http://config-host:9000"}))
+
+        with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
+            config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.base_url == "http://config-host:9000"
+
+    def test_base_url_not_read_from_host_block(self, tmp_path):
+        """baseUrl is a root-level connection setting, not overridable per-host (consistent with apiKey)."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "baseUrl": "http://root:9000",
+            "hosts": {"hermes": {"baseUrl": "http://host-block:9001"}},
+        }))
+
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.base_url == "http://root:9000"
+
 
 class TestResolveSessionName:
     def test_manual_override(self):
diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py
index 5ebd301ed..f41f81bb8 100644
--- a/tests/test_cli_init.py
+++ b/tests/test_cli_init.py
@@ -42,6 +42,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
         "prompt_toolkit.key_binding": MagicMock(),
         "prompt_toolkit.completion": MagicMock(),
         "prompt_toolkit.formatted_text": MagicMock(),
+        "prompt_toolkit.auto_suggest": MagicMock(),
     }
     with patch.dict(sys.modules, prompt_toolkit_stubs), \
          patch.dict("os.environ", clean_env, clear=False):
diff --git a/tests/test_cli_new_session.py b/tests/test_cli_new_session.py
index 7fed48e40..0490aad9c 100644
--- a/tests/test_cli_new_session.py
+++ b/tests/test_cli_new_session.py
@@ -12,6 +12,17 @@ from hermes_state import SessionDB
 from tools.todo_tool import TodoStore
 
 
+class _FakeCompressor:
+    """Minimal stand-in for ContextCompressor."""
+
+    def __init__(self):
+        self.last_prompt_tokens = 500
+        self.last_completion_tokens = 200
+        self.last_total_tokens = 700
+        self.compression_count = 3
+        self._context_probed = True
+
+
 class _FakeAgent:
     def __init__(self, session_id: str, session_start):
         self.session_id = session_id
@@ -25,6 +36,42 @@ class _FakeAgent:
         self.flush_memories = MagicMock()
         self._invalidate_system_prompt = MagicMock()
 
+        # Token counters (non-zero to verify reset)
+        self.session_total_tokens = 1000
+        self.session_input_tokens = 600
+        self.session_output_tokens = 400
+        self.session_prompt_tokens = 550
+        self.session_completion_tokens = 350
+        self.session_cache_read_tokens = 100
+        self.session_cache_write_tokens = 50
+        self.session_reasoning_tokens = 80
+        self.session_api_calls = 5
+        self.session_estimated_cost_usd = 0.42
+        self.session_cost_status = "estimated"
+        self.session_cost_source = "openrouter"
+        self.context_compressor = _FakeCompressor()
+
+    def reset_session_state(self):
+        """Mirror the real AIAgent.reset_session_state()."""
+        self.session_total_tokens = 0
+        self.session_input_tokens = 0
+        self.session_output_tokens = 0
+        self.session_prompt_tokens = 0
+        self.session_completion_tokens = 0
+        self.session_cache_read_tokens = 0
+        self.session_cache_write_tokens = 0
+        self.session_reasoning_tokens = 0
+        self.session_api_calls = 0
+        self.session_estimated_cost_usd = 0.0
+        self.session_cost_status = "unknown"
+        self.session_cost_source = "none"
+        if hasattr(self, "context_compressor") and self.context_compressor:
+            self.context_compressor.last_prompt_tokens = 0
+            self.context_compressor.last_completion_tokens = 0
+            self.context_compressor.last_total_tokens = 0
+            self.context_compressor.compression_count = 0
+            self.context_compressor._context_probed = False
+
 
 def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
     """Create a HermesCLI instance with minimal mocking."""
@@ -58,6 +105,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
         "prompt_toolkit.key_binding": MagicMock(),
         "prompt_toolkit.completion": MagicMock(),
         "prompt_toolkit.formatted_text": MagicMock(),
+        "prompt_toolkit.auto_suggest": MagicMock(),
     }
     with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
         "os.environ", clean_env, clear=False
@@ -137,3 +185,38 @@ def test_clear_command_starts_new_session_before_redrawing(tmp_path):
     cli.console.clear.assert_called_once()
     cli.show_banner.assert_called_once()
     assert cli.conversation_history == []
+
+
+def test_new_session_resets_token_counters(tmp_path):
+    """Regression test for #2099: /new must zero all token counters."""
+    cli = _prepare_cli_with_active_session(tmp_path)
+
+    # Verify counters are non-zero before reset
+    agent = cli.agent
+    assert agent.session_total_tokens > 0
+    assert agent.session_api_calls > 0
+    assert agent.context_compressor.compression_count > 0
+
+    cli.process_command("/new")
+
+    # All agent token counters must be zero
+    assert agent.session_total_tokens == 0
+    assert agent.session_input_tokens == 0
+    assert agent.session_output_tokens == 0
+    assert agent.session_prompt_tokens == 0
+    assert agent.session_completion_tokens == 0
+    assert agent.session_cache_read_tokens == 0
+    assert agent.session_cache_write_tokens == 0
+    assert agent.session_reasoning_tokens == 0
+    assert agent.session_api_calls == 0
+    assert agent.session_estimated_cost_usd == 0.0
+    assert agent.session_cost_status == "unknown"
+    assert agent.session_cost_source == "none"
+
+    # Context compressor counters must also be zero
+    comp = agent.context_compressor
+    assert comp.last_prompt_tokens == 0
+    assert comp.last_completion_tokens == 0
+    assert comp.last_total_tokens == 0
+    assert comp.compression_count == 0
+    assert comp._context_probed is False
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 74b958a56..0cb1dc19d 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -336,11 +336,9 @@ Jobs run in a fresh session with no current-chat context, so prompts must be sel
 If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
 On update, passing skills=[] clears attached skills.
 
-NOTE: The agent's final response is auto-delivered to the target — do NOT use
-send_message in the prompt for that same destination. Same-target send_message
-calls are skipped to avoid duplicate cron deliveries. Put the primary
-user-facing content in the final response, and use send_message only for
-additional or different targets.
+NOTE: The agent's final response is auto-delivered to the target. Put the primary
+user-facing content in the final response. Cron jobs run autonomously with no user
+present — they cannot ask questions or request clarification.
 
 Important safety rule: cron-run sessions should not recursively schedule more cron jobs.""",
     "parameters": {