From 7583aedacd53442d33d95acbed40c884ff90523d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 5 Jun 2026 06:43:51 -0700 Subject: [PATCH] fix(completion): remove /model autocomplete from CLI/TUI (#39727) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: respect disabled auto-compaction on context overflow Port from anomalyco/opencode#30749. When compression.enabled is false, NO automatic compaction trigger may fire. The proactive token-threshold paths (preflight + post-response should_compress gate) already honoured the setting, but the three provider-overflow recovery paths in the agent loop — long-context-tier 429, 413 payload-too-large, and context-overflow — called _compress_context() unconditionally, silently compressing and rotating the session against the user's explicit choice. Add a single guard at the top of the overflow-recovery dispatch: when compression is disabled and the error is one of those three overflow classes, surface a terminal error (compaction_disabled: True) telling the user to /compress manually, /new, switch to a larger-context model, or reduce attachments. Manual /compress (force=True) is unaffected — it never enters this loop. Tests: new TestOverflowWithCompactionDisabled (413 + 400 overflow don't compress when disabled; control case still compresses when enabled). Existing overflow-recovery tests updated to enable compaction explicitly (they verify the recovery fires); fixture defaults flipped to True to match production (compression.enabled defaults to True). * fix(completion): remove /model autocomplete from CLI/TUI The TUI frontend already suppressed /model argument completion in favor of the two-step ModelPicker (useCompletion.ts), but the CLI prompt_toolkit completer and the gateway-backed complete.slash RPC (TUI + desktop) still emitted model aliases and probed LM Studio on every keystroke. Drops the /model branch in SlashCommandCompleter.get_completions, the _model_completions method, and the LM Studio probe/cache helper that only fed it. Command-name completion (/mod -> model) and sibling arg completers (/skin, /personality) are untouched. Removes the now-dead TestModelTabCompletion tests. --- hermes_cli/commands.py | 86 +--------------------- tests/hermes_cli/test_ollama_cloud_auth.py | 66 ----------------- 2 files changed, 1 insertion(+), 151 deletions(-) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 34e3af4014b..f022ef32b07 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -1148,41 +1148,6 @@ def slack_subcommand_map() -> dict[str, str]: # --------------------------------------------------------------------------- -# Per-process cache for /model LM Studio autocomplete. Probing on -# every keystroke would block the UI; a short TTL keeps it live without -# hammering the server. -_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None - - -def _lmstudio_completion_models() -> list[str]: - """Locally-loaded LM Studio models for /model autocomplete (cached, gated).""" - global _LMSTUDIO_COMPLETION_CACHE - # Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio. - if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")): - try: - from hermes_cli.auth import _load_auth_store - store = _load_auth_store() or {} - if "lmstudio" not in (store.get("providers") or {}) \ - and "lmstudio" not in (store.get("credential_pool") or {}): - return [] - except Exception: - return [] - now = time.time() - if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0: - return _LMSTUDIO_COMPLETION_CACHE[1] - try: - from hermes_cli.models import fetch_lmstudio_models - models = fetch_lmstudio_models( - api_key=os.environ.get("LM_API_KEY", ""), - base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1", - timeout=0.8, - ) - except Exception: - models = [] - _LMSTUDIO_COMPLETION_CACHE = (now, models) - return models - - class SlashCommandCompleter(Completer): """Autocomplete for built-in slash commands, subcommands, and skill commands.""" @@ -1599,52 +1564,6 @@ class SlashCommandCompleter(Completer): except Exception: pass - def _model_completions(self, sub_text: str, sub_lower: str): - """Yield completions for /model from config aliases + built-in aliases.""" - seen = set() - # Config-based direct aliases (preferred — include provider info) - try: - from hermes_cli.model_switch import ( - _ensure_direct_aliases, DIRECT_ALIASES, MODEL_ALIASES, - ) - _ensure_direct_aliases() - for name, da in DIRECT_ALIASES.items(): - if name.startswith(sub_lower) and name != sub_lower: - seen.add(name) - yield Completion( - name, - start_position=-len(sub_text), - display=name, - display_meta=f"{da.model} ({da.provider})", - ) - # Built-in catalog aliases not already covered - for name in sorted(MODEL_ALIASES.keys()): - if name in seen: - continue - if name.startswith(sub_lower) and name != sub_lower: - identity = MODEL_ALIASES[name] - yield Completion( - name, - start_position=-len(sub_text), - display=name, - display_meta=f"{identity.vendor}/{identity.family}", - ) - except Exception: - pass - # LM Studio: surface locally-loaded models. Gated on the user actually - # having LM Studio configured (env var or auth-store entry) so we - # don't probe 127.0.0.1 on every keystroke for users who don't use it. - for name in _lmstudio_completion_models(): - if name in seen: - continue - if name.startswith(sub_lower) and name != sub_lower: - yield Completion( - name, - start_position=-len(sub_text), - display=name, - display_meta="LM Studio", - ) - def get_completions(self, document, complete_event): text = document.text_before_cursor if not text.startswith("/"): @@ -1668,9 +1587,6 @@ class SlashCommandCompleter(Completer): # Dynamic completions for commands with runtime lists if " " not in sub_text: - if base_cmd == "/model": - yield from self._model_completions(sub_text, sub_lower) - return if base_cmd == "/skin": yield from self._skin_completions(sub_text, sub_lower) return @@ -1788,7 +1704,7 @@ class SlashCommandAutoSuggest(AutoSuggest): return Suggestion(cmd_name[len(word):]) return None - # Command is complete — suggest subcommands or model names + # Command is complete — suggest subcommands sub_text = parts[1] if len(parts) > 1 else "" sub_lower = sub_text.lower() diff --git a/tests/hermes_cli/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py index 7e2dc5ff078..77b19c9bb1a 100644 --- a/tests/hermes_cli/test_ollama_cloud_auth.py +++ b/tests/hermes_cli/test_ollama_cloud_auth.py @@ -178,72 +178,6 @@ class TestModelSwitchPersistence: assert result.base_url == "https://api.anthropic.com" -# --------------------------------------------------------------------------- -# /model tab completion -# --------------------------------------------------------------------------- - -class TestModelTabCompletion: - """SlashCommandCompleter provides model alias completions for /model.""" - - def test_model_completions_yields_direct_aliases(self, monkeypatch): - """_model_completions yields direct aliases with model and provider info.""" - from hermes_cli.commands import SlashCommandCompleter - from hermes_cli.model_switch import DirectAlias - import hermes_cli.model_switch as ms - - test_aliases = { - "opus": DirectAlias("claude-opus-4-6", "anthropic", ""), - "qwen": DirectAlias("qwen3.5:397b", "custom", "https://ollama.com/v1"), - } - monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases) - - completer = SlashCommandCompleter() - completions = list(completer._model_completions("", "")) - - names = [c.text for c in completions] - assert "opus" in names - assert "qwen" in names - - def test_model_completions_filters_by_prefix(self, monkeypatch): - """Completions filter by typed prefix.""" - from hermes_cli.commands import SlashCommandCompleter - from hermes_cli.model_switch import DirectAlias - import hermes_cli.model_switch as ms - - test_aliases = { - "opus": DirectAlias("claude-opus-4-6", "anthropic", ""), - "qwen": DirectAlias("qwen3.5:397b", "custom", "https://ollama.com/v1"), - } - monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases) - - completer = SlashCommandCompleter() - completions = list(completer._model_completions("o", "o")) - - names = [c.text for c in completions] - assert "opus" in names - assert "qwen" not in names - - def test_model_completions_shows_metadata(self, monkeypatch): - """Completions include model name and provider in display_meta.""" - from hermes_cli.commands import SlashCommandCompleter - from hermes_cli.model_switch import DirectAlias - import hermes_cli.model_switch as ms - - test_aliases = { - "glm": DirectAlias("glm-4.7", "custom", "https://ollama.com/v1"), - } - monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases) - - completer = SlashCommandCompleter() - completions = list(completer._model_completions("g", "g")) - - assert len(completions) >= 1 - glm_comp = [c for c in completions if c.text == "glm"][0] - meta_str = str(glm_comp.display_meta) - assert "glm-4.7" in meta_str - assert "custom" in meta_str - - # --------------------------------------------------------------------------- # Fallback base_url passthrough # ---------------------------------------------------------------------------