mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix(feishu): finalize remote document downloads inside httpx.AsyncClient context (#18502)
Snapshot Content-Type and body while the client context is still active so pooled connections fully release on exit. Previously the read happened after `async with httpx.AsyncClient(...)` returned — which works today only because httpx eagerly buffers non-streaming responses; a future refactor to `.stream()` would silently read- after-close. Part of the #18451 connection-hygiene audit. Salvage of #18502.
This commit is contained in:
parent
e444d8f29c
commit
38dd057e91
2 changed files with 70 additions and 2 deletions
|
|
@ -2922,13 +2922,18 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
# Snapshot Content-Type and body while the client context is
|
||||||
|
# still active so pooled connections fully release on exit.
|
||||||
|
# See #18451.
|
||||||
|
content_type_hdr = str(response.headers.get("Content-Type", ""))
|
||||||
|
body = response.content
|
||||||
filename = self._derive_remote_filename(
|
filename = self._derive_remote_filename(
|
||||||
file_url,
|
file_url,
|
||||||
content_type=str(response.headers.get("Content-Type", "")),
|
content_type=content_type_hdr,
|
||||||
default_name=preferred_name,
|
default_name=preferred_name,
|
||||||
default_ext=default_ext,
|
default_ext=default_ext,
|
||||||
)
|
)
|
||||||
cached_path = cache_document_from_bytes(response.content, filename)
|
cached_path = cache_document_from_bytes(body, filename)
|
||||||
return cached_path, filename
|
return cached_path, filename
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
||||||
|
|
@ -1771,6 +1771,69 @@ class TestAdapterBehavior(unittest.TestCase):
|
||||||
self.assertIn("GIF downgraded to file", caption)
|
self.assertIn("GIF downgraded to file", caption)
|
||||||
self.assertIn("look", caption)
|
self.assertIn("look", caption)
|
||||||
|
|
||||||
|
def test_download_remote_document_reads_response_before_httpx_client_closes(self):
|
||||||
|
"""#18451 — snapshot Content-Type + body while the httpx.AsyncClient
|
||||||
|
context is still active so pooled connections fully release on
|
||||||
|
exit. Otherwise the response is only readable because httpx
|
||||||
|
eagerly buffers it; a future refactor to .stream() would silently
|
||||||
|
read-after-close."""
|
||||||
|
from gateway.config import PlatformConfig
|
||||||
|
from gateway.platforms.feishu import FeishuAdapter
|
||||||
|
|
||||||
|
events: list[str] = []
|
||||||
|
|
||||||
|
class _FakeResponse:
|
||||||
|
headers = {"Content-Type": "application/octet-stream"}
|
||||||
|
|
||||||
|
def raise_for_status(self) -> None:
|
||||||
|
events.append("raise_for_status")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def content(self) -> bytes:
|
||||||
|
events.append("content_read")
|
||||||
|
return b"doc-bytes"
|
||||||
|
|
||||||
|
class _FakeAsyncClient:
|
||||||
|
def __init__(self, *_a: object, **_k: object) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "_FakeAsyncClient":
|
||||||
|
events.append("client_enter")
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc: object) -> None:
|
||||||
|
events.append("client_exit")
|
||||||
|
|
||||||
|
async def get(self, *_a: object, **_k: object) -> _FakeResponse:
|
||||||
|
events.append("get")
|
||||||
|
return _FakeResponse()
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
with patch.dict(os.environ, {"HERMES_HOME": tmp}, clear=False):
|
||||||
|
adapter = FeishuAdapter(PlatformConfig())
|
||||||
|
|
||||||
|
async def _run() -> tuple[str, str]:
|
||||||
|
with patch("tools.url_safety.is_safe_url", return_value=True):
|
||||||
|
with patch("httpx.AsyncClient", _FakeAsyncClient):
|
||||||
|
with patch(
|
||||||
|
"gateway.platforms.feishu.cache_document_from_bytes",
|
||||||
|
return_value="/tmp/cached-doc.bin",
|
||||||
|
):
|
||||||
|
return await adapter._download_remote_document(
|
||||||
|
"https://example.com/doc.bin",
|
||||||
|
default_ext=".bin",
|
||||||
|
preferred_name="doc",
|
||||||
|
)
|
||||||
|
|
||||||
|
path, filename = asyncio.run(_run())
|
||||||
|
|
||||||
|
self.assertEqual(path, "/tmp/cached-doc.bin")
|
||||||
|
self.assertTrue(filename)
|
||||||
|
# content_read MUST happen before client_exit — otherwise we're
|
||||||
|
# reading response body after the connection pool has been torn
|
||||||
|
# down, which only works by accident (httpx's eager buffering).
|
||||||
|
self.assertLess(events.index("content_read"), events.index("client_exit"))
|
||||||
|
|
||||||
def test_dedup_state_persists_across_adapter_restart(self):
|
def test_dedup_state_persists_across_adapter_restart(self):
|
||||||
from gateway.config import PlatformConfig
|
from gateway.config import PlatformConfig
|
||||||
from gateway.platforms.feishu import FeishuAdapter
|
from gateway.platforms.feishu import FeishuAdapter
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue