From c1cc3d4ea65bf11dc493f9d81695bab0003b1aa9 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 9 May 2026 17:45:09 -0700 Subject: [PATCH] perf(image_gen): defer fal_client import to first generation request (#22859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `tools/image_generation_tool.py` did `import fal_client` at module top, which pulled the entire fal_client + httpx + rich stack on every process that ran `discover_builtin_tools()` — every `hermes` cold start, even ones that never touch image generation. Make the import lazy: replace the eager import with a placeholder (`fal_client: Any = None`) and add an idempotent `_load_fal_client()` that rebinds the module global on first use. Call it from the two runtime entry points (`_ManagedFalSyncClient.__init__` and `_submit_fal_request`) and from the SDK-presence check in `check_image_generation_requirements`. The loader short-circuits if the global is already truthy, which preserves the test pattern of monkeypatching `fal_client` to install a mock — the `monkeypatch.setattr(image_tool, "fal_client", ...)` calls in test_image_generation.py keep working unchanged. Measured impact (15-run min times, 9950X3D): tools.image_generation_tool alone: 77 → 20 ms (-74%) 36 → 20 MB (-44%) import cli (full): 734 → 720 ms (-2%) import model_tools: 372 → 366 ms (-2%) The microbench is dramatic but the full-CLI win is small — fal_client shares its httpx + rich dependencies with the rest of the agent, so on a real cold start most of the 16 MB / 64 ms is already paid by other imports. The win matters mostly for processes that touch this tool without otherwise loading httpx (rare) and for architectural consistency with the previous lazy-load PRs (#22681 google_chat, #22831 teams). Tests: 55/55 `tests/tools/test_image_generation.py` pass, including the cases that monkeypatch the module global to install a mock fal_client. End-to-end verification confirms `import model_tools` no longer pulls `fal_client` into `sys.modules`. --- tools/image_generation_tool.py | 39 ++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index c97d9e7b64a..68f4af9ac0c 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -29,7 +29,33 @@ import uuid from typing import Any, Dict, Optional, Union from urllib.parse import urlencode -import fal_client +# fal_client is imported lazily — see _load_fal_client(). Pulling it +# eagerly added ~64 ms to every CLI cold start because +# discover_builtin_tools() imports this module unconditionally during +# the registry walk, even when image generation is never used. +# +# Tests that monkeypatch this attribute (e.g. +# ``monkeypatch.setattr(image_tool, "fal_client", fake_fal_client)``) +# still work: _load_fal_client() short-circuits when the attribute is +# anything truthy, so a test-installed mock is not overwritten by a +# subsequent real import. +fal_client: Any = None + + +def _load_fal_client() -> Any: + """Lazily import fal_client and rebind the module global on first use. + + Idempotent. Returns the (now-loaded) ``fal_client`` module reference. + Skips the import if the global is already truthy — this preserves the + test pattern of monkeypatching the module global to install a mock. + """ + global fal_client + if fal_client is not None: + return fal_client + import fal_client as _fal_client # noqa: F811 — module-global rebind + fal_client = _fal_client + return fal_client + from tools.debug_helpers import DebugSession from tools.managed_tool_gateway import resolve_managed_tool_gateway @@ -338,6 +364,9 @@ class _ManagedFalSyncClient: """Small per-instance wrapper around fal_client.SyncClient for managed queue hosts.""" def __init__(self, *, key: str, queue_run_origin: str): + # Trigger the lazy import on first construction. Idempotent — the + # placeholder is overwritten with the real module on first call. + _load_fal_client() sync_client_class = getattr(fal_client, "SyncClient", None) if sync_client_class is None: raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode") @@ -435,6 +464,8 @@ def _get_managed_fal_client(managed_gateway): def _submit_fal_request(model: str, arguments: Dict[str, Any]): """Submit a FAL request using direct credentials or the managed queue gateway.""" + # Trigger the lazy import on first call. Idempotent. + _load_fal_client() request_headers = {"x-idempotency-key": str(uuid.uuid4())} managed_gateway = _resolve_managed_fal_gateway() if managed_gateway is None: @@ -788,7 +819,11 @@ def check_image_generation_requirements() -> bool: """ try: if check_fal_api_key(): - fal_client # noqa: F401 — SDK presence check + # Trigger the lazy fal_client import here as the SDK presence + # check. Raises ImportError if the optional ``fal-client`` + # package isn't installed; the caller's except ImportError + # below catches that and continues to plugin probing. + _load_fal_client() return True except ImportError: pass