From 9ba5d399e58fa353d7132b0e2e9533d281ff64d1 Mon Sep 17 00:00:00 2001
From: stablegenius49 <16443023+stablegenius49@users.noreply.github.com>
Date: Sat, 7 Mar 2026 13:43:08 -0800
Subject: [PATCH 1/3] fix: restore missing MIT license file

---
 LICENSE | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000..75410e7331
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Nous Research
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

From 3830bbda41e21cb1953a60bd652c7cb7aa4a257a Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 7 Mar 2026 18:07:36 -0800
Subject: [PATCH 2/3] fix: include url in web_extract trimmed results & fix
 docs

The web_extract_tool was stripping the 'url' key during its output
trimming step, but documentation in 3 places claimed it was present.
This caused KeyError when accessing result['url'] in execute_code
scripts, especially when extracting from multiple URLs.

Changes:
- web_tools.py: Add 'url' back to trimmed_results output
- code_execution_tool.py: Add 'title' to _TOOL_STUBS docstring and
  _TOOL_DOC_LINES so docs match actual {url, title, content, error}
  response format
---
 tools/code_execution_tool.py | 4 ++--
 tools/web_tools.py           | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 9f0b658ca1..0d3f176091 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -78,7 +78,7 @@ _TOOL_STUBS = {
     "web_extract": (
         "web_extract",
         "urls: list",
-        '"""Extract content from URLs. Returns dict with results list of {url, content, error}."""',
+        '"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""',
         '{"urls": urls}',
     ),
     "read_file": (
@@ -605,7 +605,7 @@ _TOOL_DOC_LINES = [
      "    Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"),
     ("web_extract",
      "  web_extract(urls: list[str]) -> dict\n"
-     "    Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"),
+     "    Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"),
     ("read_file",
      "  read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n"
      "    Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"),
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 5bf223425c..0fd0f4107b 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -787,6 +787,7 @@ async def web_extract_tool(
         # Trim output to minimal fields per entry: title, content, error
         trimmed_results = [
             {
+                "url": r.get("url", ""),
                 "title": r.get("title", ""),
                 "content": r.get("content", ""),
                 "error": r.get("error"),

From c7b6f423c713d4b54af26d559d1853ec948cfad5 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 7 Mar 2026 20:09:48 -0800
Subject: [PATCH 3/3] feat: auto-compress pathologically large gateway sessions
 (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.
---
 gateway/run.py                        | 161 ++++++++++++++++++++++++++
 tests/gateway/test_session_hygiene.py | 159 +++++++++++++++++++++++++
 2 files changed, 320 insertions(+)
 create mode 100644 tests/gateway/test_session_hygiene.py

diff --git a/gateway/run.py b/gateway/run.py
index be64d13a1b..8a89e0fbe0 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -831,6 +831,167 @@ class GatewayRunner:
         # Load conversation history from transcript
         history = self.session_store.load_transcript(session_entry.session_id)
         
+        # -----------------------------------------------------------------
+        # Session hygiene: auto-compress pathologically large transcripts
+        #
+        # Long-lived gateway sessions can accumulate enough history that
+        # every new message rehydrates an oversized transcript, causing
+        # repeated truncation/context failures.  Detect this early and
+        # compress proactively — before the agent even starts.  (#628)
+        # -----------------------------------------------------------------
+        if history and len(history) >= 4:
+            from agent.model_metadata import estimate_messages_tokens_rough
+
+            # Read thresholds from config.yaml → session_hygiene section
+            _hygiene_cfg = {}
+            try:
+                _hyg_cfg_path = _hermes_home / "config.yaml"
+                if _hyg_cfg_path.exists():
+                    import yaml as _hyg_yaml
+                    with open(_hyg_cfg_path) as _hyg_f:
+                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
+                    _hygiene_cfg = _hyg_data.get("session_hygiene", {})
+                    if not isinstance(_hygiene_cfg, dict):
+                        _hygiene_cfg = {}
+            except Exception:
+                pass
+
+            _compress_token_threshold = int(
+                _hygiene_cfg.get("auto_compress_tokens", 100_000)
+            )
+            _compress_msg_threshold = int(
+                _hygiene_cfg.get("auto_compress_messages", 200)
+            )
+            _warn_token_threshold = int(
+                _hygiene_cfg.get("warn_tokens", 200_000)
+            )
+
+            _msg_count = len(history)
+            _approx_tokens = estimate_messages_tokens_rough(history)
+
+            _needs_compress = (
+                _approx_tokens >= _compress_token_threshold
+                or _msg_count >= _compress_msg_threshold
+            )
+
+            if _needs_compress:
+                logger.info(
+                    "Session hygiene: %s messages, ~%s tokens — auto-compressing "
+                    "(thresholds: %s msgs / %s tokens)",
+                    _msg_count, f"{_approx_tokens:,}",
+                    _compress_msg_threshold, f"{_compress_token_threshold:,}",
+                )
+
+                _hyg_adapter = self.adapters.get(source.platform)
+                if _hyg_adapter:
+                    try:
+                        await _hyg_adapter.send(
+                            source.chat_id,
+                            f"🗜️ Session is large ({_msg_count} messages, "
+                            f"~{_approx_tokens:,} tokens). Auto-compressing..."
+                        )
+                    except Exception:
+                        pass
+
+                try:
+                    from run_agent import AIAgent
+
+                    _hyg_runtime = _resolve_runtime_agent_kwargs()
+                    if _hyg_runtime.get("api_key"):
+                        _hyg_msgs = [
+                            {"role": m.get("role"), "content": m.get("content")}
+                            for m in history
+                            if m.get("role") in ("user", "assistant")
+                            and m.get("content")
+                        ]
+
+                        if len(_hyg_msgs) >= 4:
+                            _hyg_agent = AIAgent(
+                                **_hyg_runtime,
+                                max_iterations=4,
+                                quiet_mode=True,
+                                enabled_toolsets=["memory"],
+                                session_id=session_entry.session_id,
+                            )
+
+                            loop = asyncio.get_event_loop()
+                            _compressed, _ = await loop.run_in_executor(
+                                None,
+                                lambda: _hyg_agent._compress_context(
+                                    _hyg_msgs, "",
+                                    approx_tokens=_approx_tokens,
+                                ),
+                            )
+
+                            self.session_store.rewrite_transcript(
+                                session_entry.session_id, _compressed
+                            )
+                            history = _compressed
+                            _new_count = len(_compressed)
+                            _new_tokens = estimate_messages_tokens_rough(
+                                _compressed
+                            )
+
+                            logger.info(
+                                "Session hygiene: compressed %s → %s msgs, "
+                                "~%s → ~%s tokens",
+                                _msg_count, _new_count,
+                                f"{_approx_tokens:,}", f"{_new_tokens:,}",
+                            )
+
+                            if _hyg_adapter:
+                                try:
+                                    await _hyg_adapter.send(
+                                        source.chat_id,
+                                        f"🗜️ Compressed: {_msg_count} → "
+                                        f"{_new_count} messages, "
+                                        f"~{_approx_tokens:,} → "
+                                        f"~{_new_tokens:,} tokens"
+                                    )
+                                except Exception:
+                                    pass
+
+                            # Still too large after compression — warn user
+                            if _new_tokens >= _warn_token_threshold:
+                                logger.warning(
+                                    "Session hygiene: still ~%s tokens after "
+                                    "compression — suggesting /reset",
+                                    f"{_new_tokens:,}",
+                                )
+                                if _hyg_adapter:
+                                    try:
+                                        await _hyg_adapter.send(
+                                            source.chat_id,
+                                            "⚠️ Session is still very large "
+                                            "after compression "
+                                            f"(~{_new_tokens:,} tokens). "
+                                            "Consider using /reset to start "
+                                            "fresh if you experience issues."
+                                        )
+                                    except Exception:
+                                        pass
+
+                except Exception as e:
+                    logger.warning(
+                        "Session hygiene auto-compress failed: %s", e
+                    )
+                    # Compression failed and session is dangerously large
+                    if _approx_tokens >= _warn_token_threshold:
+                        _hyg_adapter = self.adapters.get(source.platform)
+                        if _hyg_adapter:
+                            try:
+                                await _hyg_adapter.send(
+                                    source.chat_id,
+                                    f"⚠️ Session is very large "
+                                    f"({_msg_count} messages, "
+                                    f"~{_approx_tokens:,} tokens) and "
+                                    "auto-compression failed. Consider "
+                                    "using /compress or /reset to avoid "
+                                    "issues."
+                                )
+                            except Exception:
+                                pass
+
         # First-message onboarding -- only on the very first interaction ever
         if not history and not self.session_store.has_any_sessions():
             context_prompt += (
diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py
new file mode 100644
index 0000000000..b357d58619
--- /dev/null
+++ b/tests/gateway/test_session_hygiene.py
@@ -0,0 +1,159 @@
+"""Tests for gateway session hygiene — auto-compression of large sessions.
+
+Verifies that the gateway detects pathologically large transcripts and
+triggers auto-compression before running the agent.  (#628)
+"""
+
+import pytest
+from unittest.mock import patch, MagicMock, AsyncMock
+from agent.model_metadata import estimate_messages_tokens_rough
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_history(n_messages: int, content_size: int = 100) -> list:
+    """Build a fake transcript with n_messages user/assistant pairs."""
+    history = []
+    content = "x" * content_size
+    for i in range(n_messages):
+        role = "user" if i % 2 == 0 else "assistant"
+        history.append({"role": role, "content": content, "timestamp": f"t{i}"})
+    return history
+
+
+def _make_large_history_tokens(target_tokens: int) -> list:
+    """Build a history that estimates to roughly target_tokens tokens."""
+    # estimate_messages_tokens_rough counts total chars in str(msg) // 4
+    # Each msg dict has ~60 chars of overhead + content chars
+    # So for N tokens we need roughly N * 4 total chars across all messages
+    target_chars = target_tokens * 4
+    # Each message as a dict string is roughly len(content) + 60 chars
+    msg_overhead = 60
+    # Use 50 messages with appropriately sized content
+    n_msgs = 50
+    content_size = max(10, (target_chars // n_msgs) - msg_overhead)
+    return _make_history(n_msgs, content_size=content_size)
+
+
+# ---------------------------------------------------------------------------
+# Detection threshold tests
+# ---------------------------------------------------------------------------
+
+class TestSessionHygieneThresholds:
+    """Test that the threshold logic correctly identifies large sessions."""
+
+    def test_small_session_below_thresholds(self):
+        """A 10-message session should not trigger compression."""
+        history = _make_history(10)
+        msg_count = len(history)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        compress_token_threshold = 100_000
+        compress_msg_threshold = 200
+
+        needs_compress = (
+            approx_tokens >= compress_token_threshold
+            or msg_count >= compress_msg_threshold
+        )
+        assert not needs_compress
+
+    def test_large_message_count_triggers(self):
+        """200+ messages should trigger compression even if tokens are low."""
+        history = _make_history(250, content_size=10)
+        msg_count = len(history)
+
+        compress_msg_threshold = 200
+        needs_compress = msg_count >= compress_msg_threshold
+        assert needs_compress
+
+    def test_large_token_count_triggers(self):
+        """High token count should trigger compression even if message count is low."""
+        # 50 messages with huge content to exceed 100K tokens
+        history = _make_history(50, content_size=10_000)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        compress_token_threshold = 100_000
+        needs_compress = approx_tokens >= compress_token_threshold
+        assert needs_compress
+
+    def test_under_both_thresholds_no_trigger(self):
+        """Session under both thresholds should not trigger."""
+        history = _make_history(100, content_size=100)
+        msg_count = len(history)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        compress_token_threshold = 100_000
+        compress_msg_threshold = 200
+
+        needs_compress = (
+            approx_tokens >= compress_token_threshold
+            or msg_count >= compress_msg_threshold
+        )
+        assert not needs_compress
+
+    def test_custom_thresholds(self):
+        """Custom thresholds from config should be respected."""
+        history = _make_history(60, content_size=100)
+        msg_count = len(history)
+
+        # Custom lower threshold
+        compress_msg_threshold = 50
+        needs_compress = msg_count >= compress_msg_threshold
+        assert needs_compress
+
+        # Custom higher threshold
+        compress_msg_threshold = 100
+        needs_compress = msg_count >= compress_msg_threshold
+        assert not needs_compress
+
+    def test_minimum_message_guard(self):
+        """Sessions with fewer than 4 messages should never trigger."""
+        history = _make_history(3, content_size=100_000)
+        # Even with enormous content, < 4 messages should be skipped
+        # (the gateway code checks `len(history) >= 4` before evaluating)
+        assert len(history) < 4
+
+
+class TestSessionHygieneWarnThreshold:
+    """Test the post-compression warning threshold."""
+
+    def test_warn_when_still_large(self):
+        """If compressed result is still above warn_tokens, should warn."""
+        # Simulate post-compression tokens
+        warn_threshold = 200_000
+        post_compress_tokens = 250_000
+        assert post_compress_tokens >= warn_threshold
+
+    def test_no_warn_when_under(self):
+        """If compressed result is under warn_tokens, no warning."""
+        warn_threshold = 200_000
+        post_compress_tokens = 150_000
+        assert post_compress_tokens < warn_threshold
+
+
+class TestTokenEstimation:
+    """Verify rough token estimation works as expected for hygiene checks."""
+
+    def test_empty_history(self):
+        assert estimate_messages_tokens_rough([]) == 0
+
+    def test_proportional_to_content(self):
+        small = _make_history(10, content_size=100)
+        large = _make_history(10, content_size=10_000)
+        assert estimate_messages_tokens_rough(large) > estimate_messages_tokens_rough(small)
+
+    def test_proportional_to_count(self):
+        few = _make_history(10, content_size=1000)
+        many = _make_history(100, content_size=1000)
+        assert estimate_messages_tokens_rough(many) > estimate_messages_tokens_rough(few)
+
+    def test_pathological_session_detected(self):
+        """The reported pathological case: 648 messages, ~299K tokens."""
+        # Simulate a 648-message session averaging ~460 tokens per message
+        history = _make_history(648, content_size=1800)
+        tokens = estimate_messages_tokens_rough(history)
+        # Should be well above the 100K default threshold
+        assert tokens > 100_000
+        assert len(history) > 200