diff --git a/cli.py b/cli.py index 27f691dc2..3848a24d9 100644 --- a/cli.py +++ b/cli.py @@ -5974,10 +5974,16 @@ class HermesCLI: """Start capturing audio from the microphone.""" if getattr(self, '_should_exit', False): return - from tools.voice_mode import AudioRecorder, check_voice_requirements + from tools.voice_mode import create_audio_recorder, check_voice_requirements reqs = check_voice_requirements() if not reqs["audio_available"]: + if _is_termux_environment(): + raise RuntimeError( + "Voice mode requires either Termux:API microphone access or Python audio libraries.\n" + "Option 1: pkg install termux-api and install the Termux:API Android app\n" + "Option 2: pkg install python-numpy portaudio && python -m pip install sounddevice" + ) raise RuntimeError( "Voice mode requires sounddevice and numpy.\n" "Install with: pip install sounddevice numpy\n" @@ -6006,7 +6012,7 @@ class HermesCLI: pass if self._voice_recorder is None: - self._voice_recorder = AudioRecorder() + self._voice_recorder = create_audio_recorder() # Apply config-driven silence params self._voice_recorder._silence_threshold = voice_cfg.get("silence_threshold", 200) @@ -6035,7 +6041,13 @@ class HermesCLI: with self._voice_lock: self._voice_recording = False raise - _cprint(f"\n{_GOLD}● Recording...{_RST} {_DIM}(auto-stops on silence | Ctrl+B to stop & exit continuous){_RST}") + if getattr(self._voice_recorder, "supports_silence_autostop", True): + _recording_hint = "auto-stops on silence | Ctrl+B to stop & exit continuous" + elif _is_termux_environment(): + _recording_hint = "Termux:API capture | Ctrl+B to stop" + else: + _recording_hint = "Ctrl+B to stop" + _cprint(f"\n{_GOLD}● Recording...{_RST} {_DIM}({_recording_hint}){_RST}") # Periodically refresh prompt to update audio level indicator def _refresh_level(): @@ -6244,7 +6256,9 @@ class HermesCLI: _cprint(f" {_DIM}{line}{_RST}") if reqs["missing_packages"]: if _is_termux_environment(): - _cprint(f"\n {_BOLD}Install: pkg install python-numpy portaudio && python -m pip install sounddevice{_RST}") + _cprint(f"\n {_BOLD}Option 1: pkg install termux-api{_RST}") + _cprint(f" {_DIM}Then install/update the Termux:API Android app for microphone capture{_RST}") + _cprint(f" {_BOLD}Option 2: pkg install python-numpy portaudio && python -m pip install sounddevice{_RST}") else: _cprint(f"\n {_BOLD}Install: pip install {' '.join(reqs['missing_packages'])}{_RST}") _cprint(f" {_DIM}Or: pip install hermes-agent[voice]{_RST}") @@ -7201,27 +7215,39 @@ class HermesCLI: def _get_tui_prompt_fragments(self): """Return the prompt_toolkit fragments for the current interactive state.""" symbol, state_suffix = self._get_tui_prompt_symbols() + compact = self._use_minimal_tui_chrome(width=self._get_tui_terminal_width()) + + def _state_fragment(style: str, icon: str, extra: str = ""): + if compact: + text = icon + if extra: + text = f"{text} {extra.strip()}".rstrip() + return [(style, text + " ")] + if extra: + return [(style, f"{icon} {extra} {state_suffix}")] + return [(style, f"{icon} {state_suffix}")] + if self._voice_recording: bar = self._audio_level_bar() - return [("class:voice-recording", f"● {bar} {state_suffix}")] + return _state_fragment("class:voice-recording", "●", bar) if self._voice_processing: - return [("class:voice-processing", f"◉ {state_suffix}")] + return _state_fragment("class:voice-processing", "◉") if self._sudo_state: - return [("class:sudo-prompt", f"🔐 {state_suffix}")] + return _state_fragment("class:sudo-prompt", "🔐") if self._secret_state: - return [("class:sudo-prompt", f"🔑 {state_suffix}")] + return _state_fragment("class:sudo-prompt", "🔑") if self._approval_state: - return [("class:prompt-working", f"⚠ {state_suffix}")] + return _state_fragment("class:prompt-working", "⚠") if self._clarify_freetext: - return [("class:clarify-selected", f"✎ {state_suffix}")] + return _state_fragment("class:clarify-selected", "✎") if self._clarify_state: - return [("class:prompt-working", f"? {state_suffix}")] + return _state_fragment("class:prompt-working", "?") if self._command_running: - return [("class:prompt-working", f"{self._command_spinner_frame()} {state_suffix}")] + return _state_fragment("class:prompt-working", self._command_spinner_frame()) if self._agent_running: - return [("class:prompt-working", f"⚕ {state_suffix}")] + return _state_fragment("class:prompt-working", "⚕") if self._voice_mode: - return [("class:voice-prompt", f"🎤 {state_suffix}")] + return _state_fragment("class:voice-prompt", "🎤") return [("class:prompt", symbol)] def _get_tui_prompt_text(self) -> str: diff --git a/tests/cli/test_cli_skin_integration.py b/tests/cli/test_cli_skin_integration.py index 61a177cad..08a86782d 100644 --- a/tests/cli/test_cli_skin_integration.py +++ b/tests/cli/test_cli_skin_integration.py @@ -49,6 +49,25 @@ class TestCliSkinPromptIntegration: set_active_skin("ares") assert cli._get_tui_prompt_fragments() == [("class:sudo-prompt", "🔑 ❯ ")] + def test_narrow_terminals_compact_voice_prompt_fragments(self): + cli = _make_cli_stub() + cli._voice_mode = True + + with patch.object(HermesCLI, "_get_tui_terminal_width", return_value=50): + assert cli._get_tui_prompt_fragments() == [("class:voice-prompt", "🎤 ")] + + def test_narrow_terminals_compact_voice_recording_prompt_fragments(self): + cli = _make_cli_stub() + cli._voice_recording = True + cli._voice_recorder = SimpleNamespace(current_rms=3000) + + with patch.object(HermesCLI, "_get_tui_terminal_width", return_value=50): + frags = cli._get_tui_prompt_fragments() + + assert frags[0][0] == "class:voice-recording" + assert frags[0][1].startswith("●") + assert "❯" not in frags[0][1] + def test_icon_only_skin_symbol_still_visible_in_special_states(self): cli = _make_cli_stub() cli._secret_state = {"response_queue": object()} diff --git a/tests/tools/test_browser_homebrew_paths.py b/tests/tools/test_browser_homebrew_paths.py index 4c07efdee..6f92e88f9 100644 --- a/tests/tools/test_browser_homebrew_paths.py +++ b/tests/tools/test_browser_homebrew_paths.py @@ -161,6 +161,20 @@ class TestBrowserRequirements: assert check_browser_requirements() is False +class TestRunBrowserCommandTermuxFallback: + def test_termux_local_mode_rejects_bare_npx_fallback(self, monkeypatch): + monkeypatch.setenv("TERMUX_VERSION", "0.118.3") + monkeypatch.setenv("PREFIX", "/data/data/com.termux/files/usr") + monkeypatch.setattr("tools.browser_tool._find_agent_browser", lambda: "npx agent-browser") + monkeypatch.setattr("tools.browser_tool._get_cloud_provider", lambda: None) + + result = _run_browser_command("task-1", "navigate", ["https://example.com"]) + + assert result["success"] is False + assert "bare npx fallback" in result["error"] + assert "agent-browser install" in result["error"] + + class TestRunBrowserCommandPathConstruction: """Verify _run_browser_command() includes Homebrew node dirs in subprocess PATH.""" diff --git a/tests/tools/test_voice_mode.py b/tests/tools/test_voice_mode.py index 3ad728914..6ff64702a 100644 --- a/tests/tools/test_voice_mode.py +++ b/tests/tools/test_voice_mode.py @@ -199,11 +199,42 @@ class TestDetectAudioEnvironment: assert any("python -m pip install sounddevice" in w for w in result["warnings"]) + def test_termux_api_microphone_allows_voice_without_sounddevice(self, monkeypatch): + monkeypatch.setenv("TERMUX_VERSION", "0.118.3") + monkeypatch.setenv("PREFIX", "/data/data/com.termux/files/usr") + monkeypatch.delenv("SSH_CLIENT", raising=False) + monkeypatch.delenv("SSH_TTY", raising=False) + monkeypatch.delenv("SSH_CONNECTION", raising=False) + monkeypatch.setattr("tools.voice_mode.shutil.which", lambda cmd: "/data/data/com.termux/files/usr/bin/termux-microphone-record" if cmd == "termux-microphone-record" else None) + monkeypatch.setattr("tools.voice_mode._import_audio", lambda: (_ for _ in ()).throw(ImportError("no audio libs"))) + + from tools.voice_mode import detect_audio_environment + result = detect_audio_environment() + + assert result["available"] is True + assert any("Termux:API microphone recording available" in n for n in result.get("notices", [])) + assert result["warnings"] == [] + + # ============================================================================ # check_voice_requirements # ============================================================================ class TestCheckVoiceRequirements: + def test_termux_api_capture_counts_as_audio_available(self, monkeypatch): + monkeypatch.setattr("tools.voice_mode._audio_available", lambda: False) + monkeypatch.setattr("tools.voice_mode._termux_microphone_command", lambda: "/data/data/com.termux/files/usr/bin/termux-microphone-record") + monkeypatch.setattr("tools.voice_mode.detect_audio_environment", lambda: {"available": True, "warnings": [], "notices": ["Termux:API microphone recording available"]}) + monkeypatch.setattr("tools.transcription_tools._get_provider", lambda cfg: "openai") + + from tools.voice_mode import check_voice_requirements + result = check_voice_requirements() + + assert result["available"] is True + assert result["audio_available"] is True + assert result["missing_packages"] == [] + assert "Termux:API microphone" in result["details"] + def test_all_requirements_met(self, monkeypatch): monkeypatch.setattr("tools.voice_mode._audio_available", lambda: True) monkeypatch.setattr("tools.voice_mode.detect_audio_environment", @@ -250,8 +281,71 @@ class TestCheckVoiceRequirements: # AudioRecorder # ============================================================================ -class TestAudioRecorderStart: - def test_start_raises_without_audio(self, monkeypatch): +class TestCreateAudioRecorder: + def test_termux_uses_termux_audio_recorder_when_api_present(self, monkeypatch): + monkeypatch.setenv("TERMUX_VERSION", "0.118.3") + monkeypatch.setenv("PREFIX", "/data/data/com.termux/files/usr") + monkeypatch.setattr("tools.voice_mode._termux_microphone_command", lambda: "/data/data/com.termux/files/usr/bin/termux-microphone-record") + + from tools.voice_mode import create_audio_recorder, TermuxAudioRecorder + recorder = create_audio_recorder() + + assert isinstance(recorder, TermuxAudioRecorder) + assert recorder.supports_silence_autostop is False + + +class TestTermuxAudioRecorder: + def test_start_and_stop_use_termux_microphone_commands(self, monkeypatch, temp_voice_dir): + command_calls = [] + output_path = Path(temp_voice_dir) / "recording_20260409_120000.aac" + + def fake_run(cmd, **kwargs): + command_calls.append(cmd) + if cmd[1] == "-f": + Path(cmd[2]).write_bytes(b"aac-bytes") + return MagicMock(returncode=0, stdout="", stderr="") + + monkeypatch.setenv("TERMUX_VERSION", "0.118.3") + monkeypatch.setenv("PREFIX", "/data/data/com.termux/files/usr") + monkeypatch.setattr("tools.voice_mode._termux_microphone_command", lambda: "/data/data/com.termux/files/usr/bin/termux-microphone-record") + monkeypatch.setattr("tools.voice_mode.time.strftime", lambda fmt: "20260409_120000") + monkeypatch.setattr("tools.voice_mode.subprocess.run", fake_run) + + from tools.voice_mode import TermuxAudioRecorder + recorder = TermuxAudioRecorder() + recorder.start() + recorder._start_time = time.monotonic() - 1.0 + result = recorder.stop() + + assert result == str(output_path) + assert command_calls[0][:2] == ["/data/data/com.termux/files/usr/bin/termux-microphone-record", "-f"] + assert command_calls[1] == ["/data/data/com.termux/files/usr/bin/termux-microphone-record", "-q"] + + def test_cancel_removes_partial_termux_recording(self, monkeypatch, temp_voice_dir): + output_path = Path(temp_voice_dir) / "recording_20260409_120000.aac" + + def fake_run(cmd, **kwargs): + if cmd[1] == "-f": + Path(cmd[2]).write_bytes(b"aac-bytes") + return MagicMock(returncode=0, stdout="", stderr="") + + monkeypatch.setenv("TERMUX_VERSION", "0.118.3") + monkeypatch.setenv("PREFIX", "/data/data/com.termux/files/usr") + monkeypatch.setattr("tools.voice_mode._termux_microphone_command", lambda: "/data/data/com.termux/files/usr/bin/termux-microphone-record") + monkeypatch.setattr("tools.voice_mode.time.strftime", lambda fmt: "20260409_120000") + monkeypatch.setattr("tools.voice_mode.subprocess.run", fake_run) + + from tools.voice_mode import TermuxAudioRecorder + recorder = TermuxAudioRecorder() + recorder.start() + recorder.cancel() + + assert output_path.exists() is False + assert recorder.is_recording is False + + +class TestAudioRecorder: + def test_start_raises_without_audio_libs(self, monkeypatch): def _fail_import(): raise ImportError("no sounddevice") monkeypatch.setattr("tools.voice_mode._import_audio", _fail_import) diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 6e393e572..5fc028b6c 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -296,6 +296,17 @@ def _browser_install_hint() -> str: return "npm install -g agent-browser && agent-browser install --with-deps" +def _requires_real_termux_browser_install(browser_cmd: str) -> bool: + return _is_termux_environment() and _is_local_mode() and browser_cmd.strip() == "npx agent-browser" + + +def _termux_browser_install_error() -> str: + return ( + "Local browser automation on Termux cannot rely on the bare npx fallback. " + f"Install agent-browser explicitly first: {_browser_install_hint()}" + ) + + def _is_local_mode() -> bool: """Return True when the browser tool will use a local browser backend.""" if _get_cdp_override(): @@ -864,6 +875,11 @@ def _run_browser_command( except FileNotFoundError as e: logger.warning("agent-browser CLI not found: %s", e) return {"success": False, "error": str(e)} + + if _requires_real_termux_browser_install(browser_cmd): + error = _termux_browser_install_error() + logger.warning("browser command blocked on Termux: %s", error) + return {"success": False, "error": error} from tools.interrupt import is_interrupted if is_interrupted(): @@ -2060,7 +2076,7 @@ def check_browser_requirements() -> bool: # local browser dependency. Require a real install (global or local) so the # browser tool is not advertised as available when it will likely fail on # first use. - if _is_termux_environment() and _is_local_mode() and browser_cmd.strip() == "npx agent-browser": + if _requires_real_termux_browser_install(browser_cmd): return False # In cloud mode, also require provider credentials @@ -2092,10 +2108,13 @@ if __name__ == "__main__": else: print("❌ Missing requirements:") try: - _find_agent_browser() + browser_cmd = _find_agent_browser() + if _requires_real_termux_browser_install(browser_cmd): + print(" - bare npx fallback found (insufficient on Termux local mode)") + print(f" Install: {_browser_install_hint()}") except FileNotFoundError: print(" - agent-browser CLI not found") - print(" Install: npm install -g agent-browser && agent-browser install --with-deps") + print(f" Install: {_browser_install_hint()}") if _cp is not None and not _cp.is_configured(): print(f" - {_cp.provider_name()} credentials not configured") print(" Tip: set browser.cloud_provider to 'local' to use free local mode instead") diff --git a/tools/voice_mode.py b/tools/voice_mode.py index c3c0b5754..a3128eb41 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -59,6 +59,22 @@ def _voice_capture_install_hint() -> str: return "pip install sounddevice numpy" +def _termux_microphone_command() -> Optional[str]: + if not _is_termux_environment(): + return None + return shutil.which("termux-microphone-record") + + +def _termux_media_player_command() -> Optional[str]: + if not _is_termux_environment(): + return None + return shutil.which("termux-media-player") + + +def _termux_voice_capture_available() -> bool: + return _termux_microphone_command() is not None + + def detect_audio_environment() -> dict: """Detect if the current environment supports audio I/O. @@ -68,6 +84,7 @@ def detect_audio_environment() -> dict: """ warnings = [] # hard-fail: these block voice mode notices = [] # informational: logged but don't block + termux_capture = _termux_voice_capture_available() # SSH detection if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')): @@ -100,18 +117,28 @@ def detect_audio_environment() -> dict: try: devices = sd.query_devices() if not devices: - warnings.append("No audio input/output devices detected") + if termux_capture: + notices.append("No PortAudio devices detected, but Termux:API microphone capture is available") + else: + warnings.append("No audio input/output devices detected") except Exception: # In WSL with PulseAudio, device queries can fail even though # recording/playback works fine. Don't block if PULSE_SERVER is set. if os.environ.get('PULSE_SERVER'): notices.append("Audio device query failed but PULSE_SERVER is set -- continuing") + elif termux_capture: + notices.append("PortAudio device query failed, but Termux:API microphone capture is available") else: warnings.append("Audio subsystem error (PortAudio cannot query devices)") except ImportError: - warnings.append(f"Audio libraries not installed ({_voice_capture_install_hint()})") + if termux_capture: + notices.append("Termux:API microphone recording available (sounddevice not required)") + else: + warnings.append(f"Audio libraries not installed ({_voice_capture_install_hint()})") except OSError: - if _is_termux_environment(): + if termux_capture: + notices.append("Termux:API microphone recording available (PortAudio not required)") + elif _is_termux_environment(): warnings.append( "PortAudio system library not found -- install it first:\n" " Termux: pkg install portaudio\n" @@ -192,6 +219,129 @@ def play_beep(frequency: int = 880, duration: float = 0.12, count: int = 1) -> N logger.debug("Beep playback failed: %s", e) +# ============================================================================ +# Termux Audio Recorder +# ============================================================================ +class TermuxAudioRecorder: + """Recorder backend that uses Termux:API microphone capture commands.""" + + supports_silence_autostop = False + + def __init__(self) -> None: + self._lock = threading.Lock() + self._recording = False + self._start_time = 0.0 + self._recording_path: Optional[str] = None + self._current_rms = 0 + + @property + def is_recording(self) -> bool: + return self._recording + + @property + def elapsed_seconds(self) -> float: + if not self._recording: + return 0.0 + return time.monotonic() - self._start_time + + @property + def current_rms(self) -> int: + return self._current_rms + + def start(self, on_silence_stop=None) -> None: + del on_silence_stop # Termux:API does not expose live silence callbacks. + mic_cmd = _termux_microphone_command() + if not mic_cmd: + raise RuntimeError( + "Termux voice capture requires the termux-api package and app.\n" + "Install with: pkg install termux-api\n" + "Then install/update the Termux:API Android app." + ) + + with self._lock: + if self._recording: + return + os.makedirs(_TEMP_DIR, exist_ok=True) + timestamp = time.strftime("%Y%m%d_%H%M%S") + self._recording_path = os.path.join(_TEMP_DIR, f"recording_{timestamp}.aac") + + command = [ + mic_cmd, + "-f", self._recording_path, + "-l", "0", + "-e", "aac", + "-r", str(SAMPLE_RATE), + "-c", str(CHANNELS), + ] + try: + subprocess.run(command, capture_output=True, text=True, timeout=15, check=True) + except subprocess.CalledProcessError as e: + details = (e.stderr or e.stdout or str(e)).strip() + raise RuntimeError(f"Termux microphone start failed: {details}") from e + except Exception as e: + raise RuntimeError(f"Termux microphone start failed: {e}") from e + + with self._lock: + self._start_time = time.monotonic() + self._recording = True + self._current_rms = 0 + logger.info("Termux voice recording started") + + def _stop_termux_recording(self) -> None: + mic_cmd = _termux_microphone_command() + if not mic_cmd: + return + subprocess.run([mic_cmd, "-q"], capture_output=True, text=True, timeout=15, check=False) + + def stop(self) -> Optional[str]: + with self._lock: + if not self._recording: + return None + self._recording = False + path = self._recording_path + self._recording_path = None + started_at = self._start_time + self._current_rms = 0 + + self._stop_termux_recording() + if not path or not os.path.isfile(path): + return None + if time.monotonic() - started_at < 0.3: + try: + os.unlink(path) + except OSError: + pass + return None + if os.path.getsize(path) <= 0: + try: + os.unlink(path) + except OSError: + pass + return None + logger.info("Termux voice recording stopped: %s", path) + return path + + def cancel(self) -> None: + with self._lock: + path = self._recording_path + self._recording = False + self._recording_path = None + self._current_rms = 0 + try: + self._stop_termux_recording() + except Exception: + pass + if path and os.path.isfile(path): + try: + os.unlink(path) + except OSError: + pass + logger.info("Termux voice recording cancelled") + + def shutdown(self) -> None: + self.cancel() + + # ============================================================================ # AudioRecorder # ============================================================================ @@ -211,6 +361,8 @@ class AudioRecorder: the user is silent for ``silence_duration`` seconds and calls the callback. """ + supports_silence_autostop = True + def __init__(self) -> None: self._lock = threading.Lock() self._stream: Any = None @@ -544,6 +696,13 @@ class AudioRecorder: return wav_path +def create_audio_recorder() -> AudioRecorder | TermuxAudioRecorder: + """Return the best recorder backend for the current environment.""" + if _termux_voice_capture_available(): + return TermuxAudioRecorder() + return AudioRecorder() + + # ============================================================================ # Whisper hallucination filter # ============================================================================ @@ -752,7 +911,8 @@ def check_voice_requirements() -> Dict[str, Any]: stt_available = stt_enabled and stt_provider != "none" missing: List[str] = [] - has_audio = _audio_available() + termux_capture = _termux_voice_capture_available() + has_audio = _audio_available() or termux_capture if not has_audio: missing.extend(["sounddevice", "numpy"]) @@ -763,7 +923,9 @@ def check_voice_requirements() -> Dict[str, Any]: available = has_audio and stt_available and env_check["available"] details_parts = [] - if has_audio: + if termux_capture: + details_parts.append("Audio capture: OK (Termux:API microphone)") + elif has_audio: details_parts.append("Audio capture: OK") else: details_parts.append(f"Audio capture: MISSING ({_voice_capture_install_hint()})")