From 7583aedacd53442d33d95acbed40c884ff90523d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 5 Jun 2026 06:43:51 -0700
Subject: [PATCH] fix(completion): remove /model <arg> autocomplete from
 CLI/TUI (#39727)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: respect disabled auto-compaction on context overflow

Port from anomalyco/opencode#30749.

When compression.enabled is false, NO automatic compaction trigger may
fire. The proactive token-threshold paths (preflight + post-response
should_compress gate) already honoured the setting, but the three
provider-overflow recovery paths in the agent loop — long-context-tier
429, 413 payload-too-large, and context-overflow — called
_compress_context() unconditionally, silently compressing and rotating
the session against the user's explicit choice.

Add a single guard at the top of the overflow-recovery dispatch: when
compression is disabled and the error is one of those three overflow
classes, surface a terminal error (compaction_disabled: True) telling the
user to /compress manually, /new, switch to a larger-context model, or
reduce attachments. Manual /compress (force=True) is unaffected — it never
enters this loop.

Tests: new TestOverflowWithCompactionDisabled (413 + 400 overflow don't
compress when disabled; control case still compresses when enabled).
Existing overflow-recovery tests updated to enable compaction explicitly
(they verify the recovery fires); fixture defaults flipped to True to
match production (compression.enabled defaults to True).

* fix(completion): remove /model <arg> autocomplete from CLI/TUI

The TUI frontend already suppressed /model argument completion in favor of
the two-step ModelPicker (useCompletion.ts), but the CLI prompt_toolkit
completer and the gateway-backed complete.slash RPC (TUI + desktop) still
emitted model aliases and probed LM Studio on every keystroke.

Drops the /model branch in SlashCommandCompleter.get_completions, the
_model_completions method, and the LM Studio probe/cache helper that only
fed it. Command-name completion (/mod -> model) and sibling arg completers
(/skin, /personality) are untouched. Removes the now-dead TestModelTabCompletion
tests.
---
 hermes_cli/commands.py                     | 86 +---------------------
 tests/hermes_cli/test_ollama_cloud_auth.py | 66 -----------------
 2 files changed, 1 insertion(+), 151 deletions(-)
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 34e3af4014b..f022ef32b07 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -1148,41 +1148,6 @@ def slack_subcommand_map() -> dict[str, str]:
 # ---------------------------------------------------------------------------
 
 
-# Per-process cache for /model<space> LM Studio autocomplete. Probing on
-# every keystroke would block the UI; a short TTL keeps it live without
-# hammering the server.
-_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None
-
-
-def _lmstudio_completion_models() -> list[str]:
-    """Locally-loaded LM Studio models for /model autocomplete (cached, gated)."""
-    global _LMSTUDIO_COMPLETION_CACHE
-    # Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio.
-    if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")):
-        try:
-            from hermes_cli.auth import _load_auth_store
-            store = _load_auth_store() or {}
-            if "lmstudio" not in (store.get("providers") or {}) \
-               and "lmstudio" not in (store.get("credential_pool") or {}):
-                return []
-        except Exception:
-            return []
-    now = time.time()
-    if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0:
-        return _LMSTUDIO_COMPLETION_CACHE[1]
-    try:
-        from hermes_cli.models import fetch_lmstudio_models
-        models = fetch_lmstudio_models(
-            api_key=os.environ.get("LM_API_KEY", ""),
-            base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1",
-            timeout=0.8,
-        )
-    except Exception:
-        models = []
-    _LMSTUDIO_COMPLETION_CACHE = (now, models)
-    return models
-
-
 class SlashCommandCompleter(Completer):
     """Autocomplete for built-in slash commands, subcommands, and skill commands."""
 
@@ -1599,52 +1564,6 @@ class SlashCommandCompleter(Completer):
         except Exception:
             pass
 
-    def _model_completions(self, sub_text: str, sub_lower: str):
-        """Yield completions for /model from config aliases + built-in aliases."""
-        seen = set()
-        # Config-based direct aliases (preferred — include provider info)
-        try:
-            from hermes_cli.model_switch import (
-                _ensure_direct_aliases, DIRECT_ALIASES, MODEL_ALIASES,
-            )
-            _ensure_direct_aliases()
-            for name, da in DIRECT_ALIASES.items():
-                if name.startswith(sub_lower) and name != sub_lower:
-                    seen.add(name)
-                    yield Completion(
-                        name,
-                        start_position=-len(sub_text),
-                        display=name,
-                        display_meta=f"{da.model} ({da.provider})",
-                    )
-            # Built-in catalog aliases not already covered
-            for name in sorted(MODEL_ALIASES.keys()):
-                if name in seen:
-                    continue
-                if name.startswith(sub_lower) and name != sub_lower:
-                    identity = MODEL_ALIASES[name]
-                    yield Completion(
-                        name,
-                        start_position=-len(sub_text),
-                        display=name,
-                        display_meta=f"{identity.vendor}/{identity.family}",
-                    )
-        except Exception:
-            pass
-        # LM Studio: surface locally-loaded models. Gated on the user actually
-        # having LM Studio configured (env var or auth-store entry) so we
-        # don't probe 127.0.0.1 on every keystroke for users who don't use it.
-        for name in _lmstudio_completion_models():
-            if name in seen:
-                continue
-            if name.startswith(sub_lower) and name != sub_lower:
-                yield Completion(
-                    name,
-                    start_position=-len(sub_text),
-                    display=name,
-                    display_meta="LM Studio",
-                )
-
     def get_completions(self, document, complete_event):
         text = document.text_before_cursor
         if not text.startswith("/"):
@@ -1668,9 +1587,6 @@ class SlashCommandCompleter(Completer):
 
             # Dynamic completions for commands with runtime lists
             if " " not in sub_text:
-                if base_cmd == "/model":
-                    yield from self._model_completions(sub_text, sub_lower)
-                    return
                 if base_cmd == "/skin":
                     yield from self._skin_completions(sub_text, sub_lower)
                     return
@@ -1788,7 +1704,7 @@ class SlashCommandAutoSuggest(AutoSuggest):
                     return Suggestion(cmd_name[len(word):])
             return None
 
-        # Command is complete — suggest subcommands or model names
+        # Command is complete — suggest subcommands
         sub_text = parts[1] if len(parts) > 1 else ""
         sub_lower = sub_text.lower()
 
diff --git a/tests/hermes_cli/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py
index 7e2dc5ff078..77b19c9bb1a 100644
--- a/tests/hermes_cli/test_ollama_cloud_auth.py
+++ b/tests/hermes_cli/test_ollama_cloud_auth.py
@@ -178,72 +178,6 @@ class TestModelSwitchPersistence:
         assert result.base_url == "https://api.anthropic.com"
 
 
-# ---------------------------------------------------------------------------
-# /model tab completion
-# ---------------------------------------------------------------------------
-
-class TestModelTabCompletion:
-    """SlashCommandCompleter provides model alias completions for /model."""
-
-    def test_model_completions_yields_direct_aliases(self, monkeypatch):
-        """_model_completions yields direct aliases with model and provider info."""
-        from hermes_cli.commands import SlashCommandCompleter
-        from hermes_cli.model_switch import DirectAlias
-        import hermes_cli.model_switch as ms
-
-        test_aliases = {
-            "opus": DirectAlias("claude-opus-4-6", "anthropic", ""),
-            "qwen": DirectAlias("qwen3.5:397b", "custom", "https://ollama.com/v1"),
-        }
-        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
-
-        completer = SlashCommandCompleter()
-        completions = list(completer._model_completions("", ""))
-
-        names = [c.text for c in completions]
-        assert "opus" in names
-        assert "qwen" in names
-
-    def test_model_completions_filters_by_prefix(self, monkeypatch):
-        """Completions filter by typed prefix."""
-        from hermes_cli.commands import SlashCommandCompleter
-        from hermes_cli.model_switch import DirectAlias
-        import hermes_cli.model_switch as ms
-
-        test_aliases = {
-            "opus": DirectAlias("claude-opus-4-6", "anthropic", ""),
-            "qwen": DirectAlias("qwen3.5:397b", "custom", "https://ollama.com/v1"),
-        }
-        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
-
-        completer = SlashCommandCompleter()
-        completions = list(completer._model_completions("o", "o"))
-
-        names = [c.text for c in completions]
-        assert "opus" in names
-        assert "qwen" not in names
-
-    def test_model_completions_shows_metadata(self, monkeypatch):
-        """Completions include model name and provider in display_meta."""
-        from hermes_cli.commands import SlashCommandCompleter
-        from hermes_cli.model_switch import DirectAlias
-        import hermes_cli.model_switch as ms
-
-        test_aliases = {
-            "glm": DirectAlias("glm-4.7", "custom", "https://ollama.com/v1"),
-        }
-        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
-
-        completer = SlashCommandCompleter()
-        completions = list(completer._model_completions("g", "g"))
-
-        assert len(completions) >= 1
-        glm_comp = [c for c in completions if c.text == "glm"][0]
-        meta_str = str(glm_comp.display_meta)
-        assert "glm-4.7" in meta_str
-        assert "custom" in meta_str
-
-
 # ---------------------------------------------------------------------------
 # Fallback base_url passthrough
 # ---------------------------------------------------------------------------