fix(termux): deepen browser, voice, and tui support

This commit is contained in:
adybag14-cyber 2026-04-09 14:16:58 +02:00 committed by Teknium
parent 3237733ca5
commit 769ec1ee1a
6 changed files with 358 additions and 24 deletions

View file

@ -296,6 +296,17 @@ def _browser_install_hint() -> str:
return "npm install -g agent-browser && agent-browser install --with-deps"
def _requires_real_termux_browser_install(browser_cmd: str) -> bool:
return _is_termux_environment() and _is_local_mode() and browser_cmd.strip() == "npx agent-browser"
def _termux_browser_install_error() -> str:
return (
"Local browser automation on Termux cannot rely on the bare npx fallback. "
f"Install agent-browser explicitly first: {_browser_install_hint()}"
)
def _is_local_mode() -> bool:
"""Return True when the browser tool will use a local browser backend."""
if _get_cdp_override():
@ -864,6 +875,11 @@ def _run_browser_command(
except FileNotFoundError as e:
logger.warning("agent-browser CLI not found: %s", e)
return {"success": False, "error": str(e)}
if _requires_real_termux_browser_install(browser_cmd):
error = _termux_browser_install_error()
logger.warning("browser command blocked on Termux: %s", error)
return {"success": False, "error": error}
from tools.interrupt import is_interrupted
if is_interrupted():
@ -2060,7 +2076,7 @@ def check_browser_requirements() -> bool:
# local browser dependency. Require a real install (global or local) so the
# browser tool is not advertised as available when it will likely fail on
# first use.
if _is_termux_environment() and _is_local_mode() and browser_cmd.strip() == "npx agent-browser":
if _requires_real_termux_browser_install(browser_cmd):
return False
# In cloud mode, also require provider credentials
@ -2092,10 +2108,13 @@ if __name__ == "__main__":
else:
print("❌ Missing requirements:")
try:
_find_agent_browser()
browser_cmd = _find_agent_browser()
if _requires_real_termux_browser_install(browser_cmd):
print(" - bare npx fallback found (insufficient on Termux local mode)")
print(f" Install: {_browser_install_hint()}")
except FileNotFoundError:
print(" - agent-browser CLI not found")
print(" Install: npm install -g agent-browser && agent-browser install --with-deps")
print(f" Install: {_browser_install_hint()}")
if _cp is not None and not _cp.is_configured():
print(f" - {_cp.provider_name()} credentials not configured")
print(" Tip: set browser.cloud_provider to 'local' to use free local mode instead")

View file

@ -59,6 +59,22 @@ def _voice_capture_install_hint() -> str:
return "pip install sounddevice numpy"
def _termux_microphone_command() -> Optional[str]:
if not _is_termux_environment():
return None
return shutil.which("termux-microphone-record")
def _termux_media_player_command() -> Optional[str]:
if not _is_termux_environment():
return None
return shutil.which("termux-media-player")
def _termux_voice_capture_available() -> bool:
return _termux_microphone_command() is not None
def detect_audio_environment() -> dict:
"""Detect if the current environment supports audio I/O.
@ -68,6 +84,7 @@ def detect_audio_environment() -> dict:
"""
warnings = [] # hard-fail: these block voice mode
notices = [] # informational: logged but don't block
termux_capture = _termux_voice_capture_available()
# SSH detection
if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
@ -100,18 +117,28 @@ def detect_audio_environment() -> dict:
try:
devices = sd.query_devices()
if not devices:
warnings.append("No audio input/output devices detected")
if termux_capture:
notices.append("No PortAudio devices detected, but Termux:API microphone capture is available")
else:
warnings.append("No audio input/output devices detected")
except Exception:
# In WSL with PulseAudio, device queries can fail even though
# recording/playback works fine. Don't block if PULSE_SERVER is set.
if os.environ.get('PULSE_SERVER'):
notices.append("Audio device query failed but PULSE_SERVER is set -- continuing")
elif termux_capture:
notices.append("PortAudio device query failed, but Termux:API microphone capture is available")
else:
warnings.append("Audio subsystem error (PortAudio cannot query devices)")
except ImportError:
warnings.append(f"Audio libraries not installed ({_voice_capture_install_hint()})")
if termux_capture:
notices.append("Termux:API microphone recording available (sounddevice not required)")
else:
warnings.append(f"Audio libraries not installed ({_voice_capture_install_hint()})")
except OSError:
if _is_termux_environment():
if termux_capture:
notices.append("Termux:API microphone recording available (PortAudio not required)")
elif _is_termux_environment():
warnings.append(
"PortAudio system library not found -- install it first:\n"
" Termux: pkg install portaudio\n"
@ -192,6 +219,129 @@ def play_beep(frequency: int = 880, duration: float = 0.12, count: int = 1) -> N
logger.debug("Beep playback failed: %s", e)
# ============================================================================
# Termux Audio Recorder
# ============================================================================
class TermuxAudioRecorder:
"""Recorder backend that uses Termux:API microphone capture commands."""
supports_silence_autostop = False
def __init__(self) -> None:
self._lock = threading.Lock()
self._recording = False
self._start_time = 0.0
self._recording_path: Optional[str] = None
self._current_rms = 0
@property
def is_recording(self) -> bool:
return self._recording
@property
def elapsed_seconds(self) -> float:
if not self._recording:
return 0.0
return time.monotonic() - self._start_time
@property
def current_rms(self) -> int:
return self._current_rms
def start(self, on_silence_stop=None) -> None:
del on_silence_stop # Termux:API does not expose live silence callbacks.
mic_cmd = _termux_microphone_command()
if not mic_cmd:
raise RuntimeError(
"Termux voice capture requires the termux-api package and app.\n"
"Install with: pkg install termux-api\n"
"Then install/update the Termux:API Android app."
)
with self._lock:
if self._recording:
return
os.makedirs(_TEMP_DIR, exist_ok=True)
timestamp = time.strftime("%Y%m%d_%H%M%S")
self._recording_path = os.path.join(_TEMP_DIR, f"recording_{timestamp}.aac")
command = [
mic_cmd,
"-f", self._recording_path,
"-l", "0",
"-e", "aac",
"-r", str(SAMPLE_RATE),
"-c", str(CHANNELS),
]
try:
subprocess.run(command, capture_output=True, text=True, timeout=15, check=True)
except subprocess.CalledProcessError as e:
details = (e.stderr or e.stdout or str(e)).strip()
raise RuntimeError(f"Termux microphone start failed: {details}") from e
except Exception as e:
raise RuntimeError(f"Termux microphone start failed: {e}") from e
with self._lock:
self._start_time = time.monotonic()
self._recording = True
self._current_rms = 0
logger.info("Termux voice recording started")
def _stop_termux_recording(self) -> None:
mic_cmd = _termux_microphone_command()
if not mic_cmd:
return
subprocess.run([mic_cmd, "-q"], capture_output=True, text=True, timeout=15, check=False)
def stop(self) -> Optional[str]:
with self._lock:
if not self._recording:
return None
self._recording = False
path = self._recording_path
self._recording_path = None
started_at = self._start_time
self._current_rms = 0
self._stop_termux_recording()
if not path or not os.path.isfile(path):
return None
if time.monotonic() - started_at < 0.3:
try:
os.unlink(path)
except OSError:
pass
return None
if os.path.getsize(path) <= 0:
try:
os.unlink(path)
except OSError:
pass
return None
logger.info("Termux voice recording stopped: %s", path)
return path
def cancel(self) -> None:
with self._lock:
path = self._recording_path
self._recording = False
self._recording_path = None
self._current_rms = 0
try:
self._stop_termux_recording()
except Exception:
pass
if path and os.path.isfile(path):
try:
os.unlink(path)
except OSError:
pass
logger.info("Termux voice recording cancelled")
def shutdown(self) -> None:
self.cancel()
# ============================================================================
# AudioRecorder
# ============================================================================
@ -211,6 +361,8 @@ class AudioRecorder:
the user is silent for ``silence_duration`` seconds and calls the callback.
"""
supports_silence_autostop = True
def __init__(self) -> None:
self._lock = threading.Lock()
self._stream: Any = None
@ -544,6 +696,13 @@ class AudioRecorder:
return wav_path
def create_audio_recorder() -> AudioRecorder | TermuxAudioRecorder:
"""Return the best recorder backend for the current environment."""
if _termux_voice_capture_available():
return TermuxAudioRecorder()
return AudioRecorder()
# ============================================================================
# Whisper hallucination filter
# ============================================================================
@ -752,7 +911,8 @@ def check_voice_requirements() -> Dict[str, Any]:
stt_available = stt_enabled and stt_provider != "none"
missing: List[str] = []
has_audio = _audio_available()
termux_capture = _termux_voice_capture_available()
has_audio = _audio_available() or termux_capture
if not has_audio:
missing.extend(["sounddevice", "numpy"])
@ -763,7 +923,9 @@ def check_voice_requirements() -> Dict[str, Any]:
available = has_audio and stt_available and env_check["available"]
details_parts = []
if has_audio:
if termux_capture:
details_parts.append("Audio capture: OK (Termux:API microphone)")
elif has_audio:
details_parts.append("Audio capture: OK")
else:
details_parts.append(f"Audio capture: MISSING ({_voice_capture_install_hint()})")