diff --git a/tests/tools/test_browser_console.py b/tests/tools/test_browser_console.py index 1b9bb462b..b058fb3f3 100644 --- a/tests/tools/test_browser_console.py +++ b/tests/tools/test_browser_console.py @@ -3,6 +3,7 @@ import json import os import sys +from pathlib import Path from unittest.mock import patch, MagicMock import pytest @@ -194,6 +195,63 @@ class TestBrowserVisionAnnotate: assert "--annotate" in cmd_args +class TestBrowserVisionConfig: + def _setup_screenshot(self, tmp_path): + shots_dir = tmp_path / "browser_screenshots" + shots_dir.mkdir() + screenshot = shots_dir / "shot.png" + screenshot.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8) + return shots_dir, screenshot + + def test_browser_vision_uses_configured_temperature_and_timeout(self, tmp_path): + from tools.browser_tool import browser_vision + + shots_dir, screenshot = self._setup_screenshot(tmp_path) + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "Annotated screenshot analysis" + mock_response.choices = [mock_choice] + + with ( + patch("hermes_constants.get_hermes_dir", return_value=shots_dir), + patch("tools.browser_tool._cleanup_old_screenshots"), + patch("tools.browser_tool._run_browser_command", return_value={"success": True, "data": {"path": str(screenshot)}}), + patch("tools.browser_tool._get_vision_model", return_value="test-model"), + patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}), + patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm, + ): + result = json.loads(browser_vision("what is on the page?", task_id="test")) + + assert result["success"] is True + assert result["analysis"] == "Annotated screenshot analysis" + assert mock_llm.call_args.kwargs["temperature"] == 1.0 + assert mock_llm.call_args.kwargs["timeout"] == 45.0 + + def test_browser_vision_defaults_temperature_when_config_omits_it(self, tmp_path): + from tools.browser_tool import browser_vision + + shots_dir, screenshot = self._setup_screenshot(tmp_path) + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "Default screenshot analysis" + mock_response.choices = [mock_choice] + + with ( + patch("hermes_constants.get_hermes_dir", return_value=shots_dir), + patch("tools.browser_tool._cleanup_old_screenshots"), + patch("tools.browser_tool._run_browser_command", return_value={"success": True, "data": {"path": str(screenshot)}}), + patch("tools.browser_tool._get_vision_model", return_value="test-model"), + patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {}}}), + patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm, + ): + result = json.loads(browser_vision("what is on the page?", task_id="test")) + + assert result["success"] is True + assert result["analysis"] == "Default screenshot analysis" + assert mock_llm.call_args.kwargs["temperature"] == 0.1 + assert mock_llm.call_args.kwargs["timeout"] == 120.0 + + # ── auto-recording config ──────────────────────────────────────────── diff --git a/tools/browser_tool.py b/tools/browser_tool.py index f8a3ff09a..96dbd7432 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -2098,16 +2098,21 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] logger.debug("browser_vision: analysing screenshot (%d bytes)", len(_screenshot_bytes)) - # Read vision timeout from config (auxiliary.vision.timeout), default 120s. + # Read vision timeout/temperature from config (auxiliary.vision.*). # Local vision models (llama.cpp, ollama) can take well over 30s for - # screenshot analysis, so the default must be generous. + # screenshot analysis, so the default timeout must be generous. vision_timeout = 120.0 + vision_temperature = 0.1 try: from hermes_cli.config import load_config _cfg = load_config() - _vt = _cfg.get("auxiliary", {}).get("vision", {}).get("timeout") + _vision_cfg = _cfg.get("auxiliary", {}).get("vision", {}) + _vt = _vision_cfg.get("timeout") if _vt is not None: vision_timeout = float(_vt) + _vtemp = _vision_cfg.get("temperature") + if _vtemp is not None: + vision_temperature = float(_vtemp) except Exception: pass @@ -2123,7 +2128,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] } ], "max_tokens": 2000, - "temperature": 0.1, + "temperature": vision_temperature, "timeout": vision_timeout, } if vision_model: