diff --git a/agent/secret_sources/bitwarden.py b/agent/secret_sources/bitwarden.py
index 8c1e8dc5678..235a4222594 100644
--- a/agent/secret_sources/bitwarden.py
+++ b/agent/secret_sources/bitwarden.py
@@ -73,6 +73,102 @@ _BWS_RUN_TIMEOUT = 30
 _CacheKey = Tuple[str, str, str]  # (access_token_fingerprint, project_id, server_url)
 _CACHE: Dict[_CacheKey, "_CachedFetch"] = {}
 
+# Disk-persisted cache so back-to-back CLI invocations (e.g. `hermes chat -q ...`
+# called from scripts, cron, the gateway forking new agents) don't each pay the
+# ~380ms `bws secret list` tax. The in-process _CACHE above only saves repeated
+# fetches WITHIN one process; this saves repeated fetches ACROSS processes.
+#
+# Layout: one JSON object per cache key, written atomically with mode 0600 in
+# <hermes_home>/cache/bws_cache.json. The file holds only the secret VALUES,
+# never the access token. It's plaintext-equivalent to ~/.hermes/.env (which
+# we already accept) but kept out of the .env file so users editing it won't
+# accidentally commit BSM-sourced secrets.
+_DISK_CACHE_BASENAME = "bws_cache.json"
+
+
+def _disk_cache_path(home_path: Optional[Path] = None) -> Path:
+    """Return the disk cache path under hermes_home/cache/.
+
+    `home_path` is what `load_hermes_dotenv()` already resolved; falling back
+    to `$HERMES_HOME` / `~/.hermes` keeps direct callers working too.
+    """
+    if home_path is None:
+        home_path = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+    return home_path / "cache" / _DISK_CACHE_BASENAME
+
+
+def _cache_key_str(cache_key: _CacheKey) -> str:
+    """Serialize a cache key to a stable string for JSON storage."""
+    token_fp, project_id, server_url = cache_key
+    return f"{token_fp}|{project_id}|{server_url}"
+
+
+def _read_disk_cache(cache_key: _CacheKey, ttl_seconds: float,
+                     home_path: Optional[Path] = None) -> Optional["_CachedFetch"]:
+    """Return a cached entry from disk if fresh, else None.
+
+    Best-effort: any I/O or parse error returns None and we re-fetch.
+    """
+    if ttl_seconds <= 0:
+        return None
+    path = _disk_cache_path(home_path)
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            payload = json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return None
+    if not isinstance(payload, dict):
+        return None
+    if payload.get("key") != _cache_key_str(cache_key):
+        return None
+    secrets = payload.get("secrets")
+    fetched_at = payload.get("fetched_at")
+    if not isinstance(secrets, dict) or not isinstance(fetched_at, (int, float)):
+        return None
+    # Coerce all values to strings — JSON allows numbers but env vars need strings
+    typed_secrets: Dict[str, str] = {
+        k: v for k, v in secrets.items() if isinstance(k, str) and isinstance(v, str)
+    }
+    entry = _CachedFetch(secrets=typed_secrets, fetched_at=float(fetched_at))
+    if not entry.is_fresh(ttl_seconds):
+        return None
+    return entry
+
+
+def _write_disk_cache(cache_key: _CacheKey, entry: "_CachedFetch",
+                      home_path: Optional[Path] = None) -> None:
+    """Persist a cache entry to disk atomically with mode 0600.
+
+    Best-effort: any I/O error is swallowed (the next invocation will just
+    re-fetch). We never want disk cache failures to break startup.
+    """
+    path = _disk_cache_path(home_path)
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        payload = {
+            "key": _cache_key_str(cache_key),
+            "secrets": entry.secrets,
+            "fetched_at": entry.fetched_at,
+        }
+        # Write to a temp file in the same directory and atomic-rename.
+        # tempfile honors os.umask, so we explicitly chmod 0600 before rename.
+        fd, tmp = tempfile.mkstemp(
+            prefix=".bws_cache_", suffix=".tmp", dir=str(path.parent)
+        )
+        try:
+            with os.fdopen(fd, "w", encoding="utf-8") as f:
+                json.dump(payload, f)
+            os.chmod(tmp, 0o600)
+            os.replace(tmp, path)
+        except BaseException:
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+            raise
+    except OSError:
+        pass  # best-effort — disk cache miss on next invocation is fine
+
 
 @dataclass
 class _CachedFetch:
@@ -318,6 +414,7 @@ def fetch_bitwarden_secrets(
     cache_ttl_seconds: float = 300,
     use_cache: bool = True,
     server_url: str = "",
+    home_path: Optional[Path] = None,
 ) -> Tuple[Dict[str, str], List[str]]:
     """Pull the secrets for ``project_id`` from Bitwarden Secrets Manager.
 
@@ -329,6 +426,13 @@ def fetch_bitwarden_secrets(
     (``https://vault.bitwarden.com``, US Cloud).  This is plumbed into
     the subprocess as ``BWS_SERVER_URL``.
 
+    Caching is a two-layer LRU: an in-process dict (for hot-reload paths
+    inside one process) and a disk-persisted JSON file under
+    ``<hermes_home>/cache/bws_cache.json`` (for back-to-back CLI invocations).
+    Both share the same TTL.  Pass ``home_path`` so disk cache lookups find
+    the right directory in tests / non-standard installs; otherwise we fall
+    back to ``$HERMES_HOME`` / ``~/.hermes``.
+
     Raises :class:`RuntimeError` for fatal conditions (missing binary,
     auth failure, unparseable output).  Callers in the env_loader path
     catch this and emit a single warning; callers in the user-facing
@@ -344,6 +448,13 @@ def fetch_bitwarden_secrets(
         cached = _CACHE.get(cache_key)
         if cached and cached.is_fresh(cache_ttl_seconds):
             return cached.secrets, []
+        # L2: disk cache. ~5ms on cache hit vs ~380ms for `bws secret list`.
+        disk_cached = _read_disk_cache(cache_key, cache_ttl_seconds, home_path)
+        if disk_cached is not None:
+            # Promote into in-process cache so subsequent fetches in the
+            # same process skip the disk read too.
+            _CACHE[cache_key] = disk_cached
+            return disk_cached.secrets, []
 
     bws = binary or find_bws(install_if_missing=True)
     if bws is None:
@@ -355,7 +466,10 @@ def fetch_bitwarden_secrets(
         )
 
     secrets, warnings = _run_bws_list(bws, access_token, project_id, server_url)
-    _CACHE[cache_key] = _CachedFetch(secrets=secrets, fetched_at=time.time())
+    entry = _CachedFetch(secrets=secrets, fetched_at=time.time())
+    _CACHE[cache_key] = entry
+    if use_cache:
+        _write_disk_cache(cache_key, entry, home_path)
     return secrets, warnings
 
 
@@ -452,6 +566,7 @@ def apply_bitwarden_secrets(
     cache_ttl_seconds: float = 300,
     auto_install: bool = True,
     server_url: str = "",
+    home_path: Optional[Path] = None,
 ) -> FetchResult:
     """Pull secrets from BSM and set them on ``os.environ``.
 
@@ -502,6 +617,7 @@ def apply_bitwarden_secrets(
             binary=binary,
             cache_ttl_seconds=cache_ttl_seconds,
             server_url=server_url,
+            home_path=home_path,
         )
     except RuntimeError as exc:
         result.error = str(exc)
@@ -531,5 +647,15 @@ def apply_bitwarden_secrets(
 # ---------------------------------------------------------------------------
 
 
-def _reset_cache_for_tests() -> None:
+def _reset_cache_for_tests(home_path: Optional[Path] = None) -> None:
+    """Clear in-process AND disk caches.
+
+    Tests can pass ``home_path`` to scope the disk cleanup to a tmpdir.
+    Without it we fall back to the same default resolution as the cache
+    writer itself.
+    """
     _CACHE.clear()
+    try:
+        _disk_cache_path(home_path).unlink()
+    except (FileNotFoundError, OSError):
+        pass
diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py
index 447595c56d7..efea17b7360 100644
--- a/hermes_cli/env_loader.py
+++ b/hermes_cli/env_loader.py
@@ -255,6 +255,7 @@ def _apply_external_secret_sources(home_path: Path) -> None:
         cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
         auto_install=bool(bw_cfg.get("auto_install", True)),
         server_url=str(bw_cfg.get("server_url", "") or "").strip(),
+        home_path=home_path,
     )
 
     if result.applied:
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index eb672cabc8e..1ba5d7082a6 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -280,20 +280,29 @@ load_hermes_dotenv(project_env=PROJECT_ROOT / ".env")
 # module-import time). Without this, config.yaml's toggle is ignored because
 # the setup_logging() call below imports agent.redact, which reads the env var
 # exactly once. Env var in .env still wins — this is config.yaml fallback only.
+#
+# We also read network.force_ipv4 from the same yaml load to avoid two
+# separate config.yaml reads (saves ~17ms on every CLI startup — the second
+# `load_config()` was doing a full deep-merge for one boolean lookup).
+_FORCE_IPV4_EARLY = False
 try:
-    if "HERMES_REDACT_SECRETS" not in os.environ:
-        import yaml as _yaml_early
+    import yaml as _yaml_early
 
-        _cfg_path = get_hermes_home() / "config.yaml"
-        if _cfg_path.exists():
-            with open(_cfg_path, encoding="utf-8") as _f:
-                _early_sec_cfg = (_yaml_early.safe_load(_f) or {}).get("security", {})
+    _cfg_path = get_hermes_home() / "config.yaml"
+    if _cfg_path.exists():
+        with open(_cfg_path, encoding="utf-8") as _f:
+            _early_cfg_raw = _yaml_early.safe_load(_f) or {}
+        if "HERMES_REDACT_SECRETS" not in os.environ:
+            _early_sec_cfg = _early_cfg_raw.get("security", {})
             if isinstance(_early_sec_cfg, dict):
                 _early_redact = _early_sec_cfg.get("redact_secrets")
                 if _early_redact is not None:
                     os.environ["HERMES_REDACT_SECRETS"] = str(_early_redact).lower()
-            del _early_sec_cfg
-        del _cfg_path
+        _early_net_cfg = _early_cfg_raw.get("network", {})
+        if isinstance(_early_net_cfg, dict) and _early_net_cfg.get("force_ipv4"):
+            _FORCE_IPV4_EARLY = True
+        del _early_cfg_raw
+    del _cfg_path
 except Exception:
     pass  # best-effort — redaction stays at default (enabled) on config errors
 
@@ -307,17 +316,15 @@ except Exception:
     pass  # best-effort — don't crash the CLI if logging setup fails
 
 # Apply IPv4 preference early, before any HTTP clients are created.
-try:
-    from hermes_cli.config import load_config as _load_config_early
-    from hermes_constants import apply_ipv4_preference as _apply_ipv4
+# We already determined whether to force IPv4 from the raw yaml read above —
+# this just calls the toggle without a redundant load_config() round trip.
+if _FORCE_IPV4_EARLY:
+    try:
+        from hermes_constants import apply_ipv4_preference as _apply_ipv4
 
-    _early_cfg = _load_config_early()
-    _net = _early_cfg.get("network", {})
-    if isinstance(_net, dict) and _net.get("force_ipv4"):
         _apply_ipv4(force=True)
-    del _early_cfg, _net
-except Exception:
-    pass  # best-effort — don't crash if config isn't available yet
+    except Exception:
+        pass  # best-effort — don't crash if hermes_constants not importable yet
 
 import logging
 import threading
@@ -4551,11 +4558,27 @@ def _model_flow_named_custom(config, provider_info):
     print(f"   Provider: {name} ({base_url})")
 
 
-# Keep the historical eager model catalog import on desktop/CI. Termux defers
-# it to the model-selection handlers so plain `hermes --tui` does not pay for
-# requests/models.dev catalog imports before the Node TUI starts.
-if not _is_termux_startup_environment():
-    from hermes_cli.models import _PROVIDER_MODELS
+# Lazy-export the model catalog at module level. Tests and a handful of
+# downstream call sites read `hermes_cli.main._PROVIDER_MODELS` directly,
+# so the symbol needs to be reachable as a module attribute. But importing
+# the catalog eagerly costs ~55ms on every `hermes` invocation — including
+# fast paths like `hermes --version` and slash-command dispatch that never
+# touch the catalog. PEP 562 module-level __getattr__ defers the import
+# until first attribute access, so the cost is only paid by callers that
+# actually look up the catalog. Termux already defers via the same
+# mechanism (its model-selection handlers do their own function-local
+# imports), so the explicit termux branch from before is no longer needed.
+_LAZY_MODEL_EXPORTS = ("_PROVIDER_MODELS",)
+
+
+def __getattr__(name):
+    """Defer the model-catalog import until something actually reads it."""
+    if name in _LAZY_MODEL_EXPORTS:
+        from hermes_cli.models import _PROVIDER_MODELS
+        # Cache on the module so subsequent accesses skip the import machinery.
+        globals()[name] = _PROVIDER_MODELS
+        return _PROVIDER_MODELS
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
 
 def _current_reasoning_effort(config) -> str:
diff --git a/tests/test_bitwarden_secrets.py b/tests/test_bitwarden_secrets.py
index 125fbcdc49e..3938585469f 100644
--- a/tests/test_bitwarden_secrets.py
+++ b/tests/test_bitwarden_secrets.py
@@ -572,3 +572,224 @@ def test_env_loader_calls_bsm_when_enabled(tmp_path, monkeypatch):
 
     assert called["n"] == 1
     assert os.environ.get("MY_BSM_KEY") == "from-bsm"
+
+
+# ---------------------------------------------------------------------------
+# Disk-persisted cache (cross-process — speeds up back-to-back CLI invocations)
+# ---------------------------------------------------------------------------
+
+
+def test_disk_cache_written_after_first_fetch(monkeypatch, tmp_path):
+    """First fetch hits bws AND writes a 0600 file under hermes_home/cache/."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    fake_binary = tmp_path / "bws"
+    fake_binary.write_text("")
+    payload = _fake_bws_payload([{"key": "K1", "value": "v1"}])
+
+    call_count = {"n": 0}
+    def fake_run(*a, **kw):
+        call_count["n"] += 1
+        return mock.Mock(returncode=0, stdout=payload, stderr="")
+    monkeypatch.setattr(bw.subprocess, "run", fake_run)
+    bw._reset_cache_for_tests(home)
+
+    secrets, _ = bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, home_path=home,
+    )
+    assert secrets == {"K1": "v1"}
+    assert call_count["n"] == 1
+
+    cache_path = bw._disk_cache_path(home)
+    assert cache_path.exists()
+    # Mode must be 0600 — disk cache contains plaintext secret values
+    mode = os.stat(cache_path).st_mode & 0o777
+    assert mode == 0o600, f"expected 0o600, got 0o{mode:o}"
+
+    # File contents: key (fingerprint not raw token), secrets dict, fetched_at
+    payload_disk = json.loads(cache_path.read_text())
+    assert set(payload_disk.keys()) == {"key", "secrets", "fetched_at"}
+    assert payload_disk["secrets"] == {"K1": "v1"}
+    # Critically, the raw access token must NOT appear anywhere in the file
+    assert "0.t" not in cache_path.read_text()
+
+
+def test_disk_cache_short_circuits_bws_when_fresh(monkeypatch, tmp_path):
+    """Second fetch (different process simulation) skips bws entirely."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    fake_binary = tmp_path / "bws"
+    fake_binary.write_text("")
+    payload = _fake_bws_payload([{"key": "K1", "value": "v1"}])
+
+    call_count = {"n": 0}
+    def fake_run(*a, **kw):
+        call_count["n"] += 1
+        return mock.Mock(returncode=0, stdout=payload, stderr="")
+    monkeypatch.setattr(bw.subprocess, "run", fake_run)
+    bw._reset_cache_for_tests(home)
+
+    # First call: hits bws, populates disk cache
+    bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, home_path=home,
+    )
+    assert call_count["n"] == 1
+
+    # Clear ONLY the in-process cache to simulate a fresh subprocess.
+    bw._CACHE.clear()
+
+    secrets2, _ = bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, home_path=home,
+    )
+    assert secrets2 == {"K1": "v1"}
+    # Critical: bws was NOT invoked the second time
+    assert call_count["n"] == 1
+
+
+def test_disk_cache_expires_with_ttl(monkeypatch, tmp_path):
+    """Stale disk cache (older than ttl) triggers a refetch."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    fake_binary = tmp_path / "bws"
+    fake_binary.write_text("")
+    payload = _fake_bws_payload([{"key": "K1", "value": "v1"}])
+
+    call_count = {"n": 0}
+    def fake_run(*a, **kw):
+        call_count["n"] += 1
+        return mock.Mock(returncode=0, stdout=payload, stderr="")
+    monkeypatch.setattr(bw.subprocess, "run", fake_run)
+    bw._reset_cache_for_tests(home)
+
+    # First call
+    bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, home_path=home,
+    )
+    assert call_count["n"] == 1
+
+    # Backdate the disk cache so the TTL window has passed
+    cache_path = bw._disk_cache_path(home)
+    payload_disk = json.loads(cache_path.read_text())
+    payload_disk["fetched_at"] = time.time() - 10_000
+    cache_path.write_text(json.dumps(payload_disk))
+    bw._CACHE.clear()
+
+    # Second call: stale disk → refetch
+    bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, home_path=home,
+    )
+    assert call_count["n"] == 2
+
+
+def test_disk_cache_key_mismatch_triggers_refetch(monkeypatch, tmp_path):
+    """Disk cache entry written by a different token/project is ignored."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    fake_binary = tmp_path / "bws"
+    fake_binary.write_text("")
+    payload = _fake_bws_payload([{"key": "K1", "value": "v1"}])
+
+    call_count = {"n": 0}
+    def fake_run(*a, **kw):
+        call_count["n"] += 1
+        return mock.Mock(returncode=0, stdout=payload, stderr="")
+    monkeypatch.setattr(bw.subprocess, "run", fake_run)
+    bw._reset_cache_for_tests(home)
+
+    # Write a cache entry for a DIFFERENT token/project pair
+    cache_path = bw._disk_cache_path(home)
+    cache_path.parent.mkdir(parents=True, exist_ok=True)
+    cache_path.write_text(json.dumps({
+        "key": "deadbeef00000000|other-project|",
+        "secrets": {"OTHER": "should-not-leak"},
+        "fetched_at": time.time(),
+    }))
+
+    secrets, _ = bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, home_path=home,
+    )
+    # We must NOT have used the foreign cache entry
+    assert secrets == {"K1": "v1"}
+    assert "OTHER" not in secrets
+    assert call_count["n"] == 1
+
+
+def test_disk_cache_use_cache_false_skips_disk(monkeypatch, tmp_path):
+    """use_cache=False must skip BOTH in-process and disk caches."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    fake_binary = tmp_path / "bws"
+    fake_binary.write_text("")
+    payload = _fake_bws_payload([{"key": "K1", "value": "v1"}])
+
+    call_count = {"n": 0}
+    def fake_run(*a, **kw):
+        call_count["n"] += 1
+        return mock.Mock(returncode=0, stdout=payload, stderr="")
+    monkeypatch.setattr(bw.subprocess, "run", fake_run)
+    bw._reset_cache_for_tests(home)
+
+    # First call WITH cache populates disk
+    bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, use_cache=True, home_path=home,
+    )
+    assert call_count["n"] == 1
+    bw._CACHE.clear()
+
+    # Second call with use_cache=False MUST hit bws again even though disk is fresh
+    bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, use_cache=False, home_path=home,
+    )
+    assert call_count["n"] == 2
+
+
+def test_disk_cache_corrupt_file_falls_through(monkeypatch, tmp_path):
+    """A garbage cache file must NOT crash startup — we refetch."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    fake_binary = tmp_path / "bws"
+    fake_binary.write_text("")
+    payload = _fake_bws_payload([{"key": "K1", "value": "v1"}])
+
+    monkeypatch.setattr(
+        bw.subprocess, "run",
+        lambda *a, **kw: mock.Mock(returncode=0, stdout=payload, stderr=""),
+    )
+    bw._reset_cache_for_tests(home)
+
+    # Write a corrupt cache file
+    cache_path = bw._disk_cache_path(home)
+    cache_path.parent.mkdir(parents=True, exist_ok=True)
+    cache_path.write_text("not json {{{")
+
+    secrets, _ = bw.fetch_bitwarden_secrets(
+        access_token="0.t", project_id="proj-1", binary=fake_binary,
+        cache_ttl_seconds=300, home_path=home,
+    )
+    # Refetched cleanly
+    assert secrets == {"K1": "v1"}
+    # And the corrupt file was replaced with a valid one
+    assert json.loads(cache_path.read_text())["secrets"] == {"K1": "v1"}
+
+
+def test_reset_cache_for_tests_deletes_disk_file(tmp_path):
+    """_reset_cache_for_tests(home_path) must also clean disk."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    cache_path = bw._disk_cache_path(home)
+    cache_path.parent.mkdir(parents=True, exist_ok=True)
+    cache_path.write_text("{}")
+    assert cache_path.exists()
+
+    bw._reset_cache_for_tests(home)
+    assert not cache_path.exists()
+    # Idempotent
+    bw._reset_cache_for_tests(home)