From 7d771c2b1bdfd142490c74b8a6ab5dcffad0aadc Mon Sep 17 00:00:00 2001 From: teknium1 Date: Wed, 11 Mar 2026 08:53:54 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20enhance=20Responses=20API=20=E2=80=94?= =?UTF-8?q?=20retrieval,=20deletion,=20tool=20calls,=20usage,=20CORS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cherry-picked from PR #828. --- gateway/platforms/api_server.py | 205 ++++++++++++++--- tests/gateway/test_api_server.py | 371 +++++++++++++++++++++++++++++-- 2 files changed, 526 insertions(+), 50 deletions(-) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 25e6db950c..9a6527dcdb 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -2,10 +2,12 @@ OpenAI-compatible API server platform adapter. Exposes an HTTP server with endpoints: -- POST /v1/chat/completions — OpenAI Chat Completions format (stateless) -- POST /v1/responses — OpenAI Responses API format (stateful via previous_response_id) -- GET /v1/models — lists hermes-agent as an available model -- GET /health — health check +- POST /v1/chat/completions — OpenAI Chat Completions format (stateless) +- POST /v1/responses — OpenAI Responses API format (stateful via previous_response_id) +- GET /v1/responses/{response_id} — Retrieve a stored response +- DELETE /v1/responses/{response_id} — Delete a stored response +- GET /v1/models — lists hermes-agent as an available model +- GET /health — health check Any OpenAI-compatible frontend (Open WebUI, LobeChat, etc.) can connect to hermes-agent through this adapter. @@ -82,10 +84,41 @@ class ResponseStore: while len(self._store) > self._max_size: self._store.popitem(last=False) + def delete(self, response_id: str) -> bool: + """Remove a response from the store. Returns True if found and deleted.""" + if response_id in self._store: + del self._store[response_id] + return True + return False + def __len__(self) -> int: return len(self._store) +# --------------------------------------------------------------------------- +# CORS middleware +# --------------------------------------------------------------------------- + +_CORS_HEADERS = { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "GET, POST, DELETE, OPTIONS", + "Access-Control-Allow-Headers": "Authorization, Content-Type", +} + + +if AIOHTTP_AVAILABLE: + @web.middleware + async def cors_middleware(request, handler): + """Add CORS headers to every response; handle OPTIONS preflight.""" + if request.method == "OPTIONS": + return web.Response(status=200, headers=_CORS_HEADERS) + response = await handler(request) + response.headers.update(_CORS_HEADERS) + return response +else: + cors_middleware = None # type: ignore[assignment] + + class APIServerAdapter(BasePlatformAdapter): """ OpenAI-compatible HTTP API server adapter. @@ -271,7 +304,7 @@ class APIServerAdapter(BasePlatformAdapter): # Run the agent in an executor (run_conversation is synchronous) session_id = str(uuid.uuid4()) try: - result = await self._run_agent( + result, usage = await self._run_agent( user_message=user_message, conversation_history=history, ephemeral_system_prompt=system_prompt, @@ -305,9 +338,9 @@ class APIServerAdapter(BasePlatformAdapter): } ], "usage": { - "prompt_tokens": 0, - "completion_tokens": 0, - "total_tokens": 0, + "prompt_tokens": usage.get("input_tokens", 0), + "completion_tokens": usage.get("output_tokens", 0), + "total_tokens": usage.get("total_tokens", 0), }, } @@ -394,10 +427,14 @@ class APIServerAdapter(BasePlatformAdapter): status=400, ) + # Truncation support + if body.get("truncation") == "auto" and len(conversation_history) > 100: + conversation_history = conversation_history[-100:] + # Run the agent session_id = str(uuid.uuid4()) try: - result = await self._run_agent( + result, usage = await self._run_agent( user_message=user_message, conversation_history=conversation_history, ephemeral_system_prompt=instructions, @@ -428,15 +465,8 @@ class APIServerAdapter(BasePlatformAdapter): else: full_history.append({"role": "assistant", "content": final_response}) - # Store response for future chaining - if store: - self._response_store.put(response_id, { - "input": raw_input, - "output": final_response, - "conversation_history": full_history, - "instructions": instructions, - "created_at": created_at, - }) + # Build output items (includes tool calls + final message) + output_items = self._extract_output_items(result) response_data = { "id": response_id, @@ -444,42 +474,136 @@ class APIServerAdapter(BasePlatformAdapter): "status": "completed", "created_at": created_at, "model": body.get("model", "hermes-agent"), - "output": [ - { - "type": "message", - "role": "assistant", - "content": [ - { - "type": "output_text", - "text": final_response, - } - ], - } - ], + "output": output_items, "usage": { - "input_tokens": 0, - "output_tokens": 0, - "total_tokens": 0, + "input_tokens": usage.get("input_tokens", 0), + "output_tokens": usage.get("output_tokens", 0), + "total_tokens": usage.get("total_tokens", 0), }, } + # Store the complete response object for future chaining / GET retrieval + if store: + self._response_store.put(response_id, { + "response": response_data, + "conversation_history": full_history, + "instructions": instructions, + }) + return web.json_response(response_data) # ------------------------------------------------------------------ # Agent execution # ------------------------------------------------------------------ + # ------------------------------------------------------------------ + # GET / DELETE response endpoints + # ------------------------------------------------------------------ + + async def _handle_get_response(self, request: "web.Request") -> "web.Response": + """GET /v1/responses/{response_id} — retrieve a stored response.""" + auth_err = self._check_auth(request) + if auth_err: + return auth_err + + response_id = request.match_info["response_id"] + stored = self._response_store.get(response_id) + if stored is None: + return web.json_response( + {"error": {"message": f"Response not found: {response_id}", "type": "invalid_request_error"}}, + status=404, + ) + + return web.json_response(stored["response"]) + + async def _handle_delete_response(self, request: "web.Request") -> "web.Response": + """DELETE /v1/responses/{response_id} — delete a stored response.""" + auth_err = self._check_auth(request) + if auth_err: + return auth_err + + response_id = request.match_info["response_id"] + deleted = self._response_store.delete(response_id) + if not deleted: + return web.json_response( + {"error": {"message": f"Response not found: {response_id}", "type": "invalid_request_error"}}, + status=404, + ) + + return web.json_response({ + "id": response_id, + "object": "response", + "deleted": True, + }) + + # ------------------------------------------------------------------ + # Output extraction helper + # ------------------------------------------------------------------ + + @staticmethod + def _extract_output_items(result: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Build the full output item array from the agent's messages. + + Walks *result["messages"]* and emits: + - ``function_call`` items for each tool_call on assistant messages + - ``function_call_output`` items for each tool-role message + - a final ``message`` item with the assistant's text reply + """ + items: List[Dict[str, Any]] = [] + messages = result.get("messages", []) + + for msg in messages: + role = msg.get("role") + if role == "assistant" and msg.get("tool_calls"): + for tc in msg["tool_calls"]: + func = tc.get("function", {}) + items.append({ + "type": "function_call", + "name": func.get("name", ""), + "arguments": func.get("arguments", ""), + "call_id": tc.get("id", ""), + }) + elif role == "tool": + items.append({ + "type": "function_call_output", + "call_id": msg.get("tool_call_id", ""), + "output": msg.get("content", ""), + }) + + # Final assistant message + final = result.get("final_response", "") + if not final: + final = result.get("error", "(No response generated)") + + items.append({ + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": final, + } + ], + }) + return items + + # ------------------------------------------------------------------ + # Agent execution + # ------------------------------------------------------------------ + async def _run_agent( self, user_message: str, conversation_history: List[Dict[str, str]], ephemeral_system_prompt: Optional[str] = None, session_id: Optional[str] = None, - ) -> Dict[str, Any]: + ) -> tuple: """ Create an agent and run a conversation in a thread executor. - run_conversation() is synchronous, so we run it off the event loop. + Returns ``(result_dict, usage_dict)`` where *usage_dict* contains + ``input_tokens``, ``output_tokens`` and ``total_tokens``. """ loop = asyncio.get_event_loop() @@ -492,7 +616,12 @@ class APIServerAdapter(BasePlatformAdapter): user_message=user_message, conversation_history=conversation_history, ) - return result + usage = { + "input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0, + "output_tokens": getattr(agent, "session_completion_tokens", 0) or 0, + "total_tokens": getattr(agent, "session_total_tokens", 0) or 0, + } + return result, usage return await loop.run_in_executor(None, _run) @@ -507,11 +636,13 @@ class APIServerAdapter(BasePlatformAdapter): return False try: - self._app = web.Application() + self._app = web.Application(middlewares=[cors_middleware]) self._app.router.add_get("/health", self._handle_health) self._app.router.add_get("/v1/models", self._handle_models) self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions) self._app.router.add_post("/v1/responses", self._handle_responses) + self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response) + self._app.router.add_delete("/v1/responses/{response_id}", self._handle_delete_response) self._runner = web.AppRunner(self._app) await self._runner.setup() diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 2e72f0b893..3e701dcadb 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -25,7 +25,9 @@ from gateway.config import GatewayConfig, Platform, PlatformConfig from gateway.platforms.api_server import ( APIServerAdapter, ResponseStore, + _CORS_HEADERS, check_api_server_requirements, + cors_middleware, ) @@ -88,6 +90,17 @@ class TestResponseStore: assert store.get("resp_1") == {"output": "v2"} assert len(store) == 1 + def test_delete_existing(self): + store = ResponseStore(max_size=10) + store.put("resp_1", {"output": "hello"}) + assert store.delete("resp_1") is True + assert store.get("resp_1") is None + assert len(store) == 0 + + def test_delete_missing(self): + store = ResponseStore(max_size=10) + assert store.delete("resp_missing") is False + # --------------------------------------------------------------------------- # Adapter initialization @@ -188,11 +201,13 @@ def _make_adapter(api_key: str = "") -> APIServerAdapter: def _create_app(adapter: APIServerAdapter) -> web.Application: """Create the aiohttp app from the adapter (without starting the full server).""" - app = web.Application() + app = web.Application(middlewares=[cors_middleware]) app.router.add_get("/health", adapter._handle_health) app.router.add_get("/v1/models", adapter._handle_models) app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions) app.router.add_post("/v1/responses", adapter._handle_responses) + app.router.add_get("/v1/responses/{response_id}", adapter._handle_get_response) + app.router.add_delete("/v1/responses/{response_id}", adapter._handle_delete_response) return app @@ -333,7 +348,7 @@ class TestChatCompletionsEndpoint: app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp = await cli.post( "/v1/chat/completions", json={ @@ -365,7 +380,7 @@ class TestChatCompletionsEndpoint: app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp = await cli.post( "/v1/chat/completions", json={ @@ -391,7 +406,7 @@ class TestChatCompletionsEndpoint: app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp = await cli.post( "/v1/chat/completions", json={ @@ -469,7 +484,7 @@ class TestResponsesEndpoint: app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp = await cli.post( "/v1/responses", json={ @@ -496,7 +511,7 @@ class TestResponsesEndpoint: app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp = await cli.post( "/v1/responses", json={ @@ -522,7 +537,7 @@ class TestResponsesEndpoint: app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp = await cli.post( "/v1/responses", json={ @@ -549,7 +564,7 @@ class TestResponsesEndpoint: async with TestClient(TestServer(app)) as cli: # First request with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result_1 + mock_run.return_value = (mock_result_1, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp1 = await cli.post( "/v1/responses", json={"model": "hermes-agent", "input": "What is 1+1?"}, @@ -567,7 +582,7 @@ class TestResponsesEndpoint: } with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result_2 + mock_run.return_value = (mock_result_2, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp2 = await cli.post( "/v1/responses", json={ @@ -605,7 +620,7 @@ class TestResponsesEndpoint: app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp = await cli.post( "/v1/responses", json={ @@ -629,7 +644,7 @@ class TestResponsesEndpoint: async with TestClient(TestServer(app)) as cli: # First request with instructions with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp1 = await cli.post( "/v1/responses", json={ @@ -644,7 +659,7 @@ class TestResponsesEndpoint: # Second request without instructions with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp2 = await cli.post( "/v1/responses", json={ @@ -781,7 +796,7 @@ class TestMultipleSystemMessages: app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: - mock_run.return_value = mock_result + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) resp = await cli.post( "/v1/chat/completions", json={ @@ -814,3 +829,333 @@ class TestSendMethod: result = await adapter.send("chat1", "hello") assert result.success is False assert "HTTP request/response" in result.error + + +# --------------------------------------------------------------------------- +# GET /v1/responses/{response_id} +# --------------------------------------------------------------------------- + + +class TestGetResponse: + @pytest.mark.asyncio + async def test_get_stored_response(self, adapter): + """GET returns a previously stored response.""" + mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1} + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + # Create a response first + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}) + resp = await cli.post( + "/v1/responses", + json={"model": "hermes-agent", "input": "Hi"}, + ) + + assert resp.status == 200 + data = await resp.json() + response_id = data["id"] + + # Now GET it + resp2 = await cli.get(f"/v1/responses/{response_id}") + assert resp2.status == 200 + data2 = await resp2.json() + assert data2["id"] == response_id + assert data2["object"] == "response" + assert data2["status"] == "completed" + + @pytest.mark.asyncio + async def test_get_not_found(self, adapter): + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.get("/v1/responses/resp_nonexistent") + assert resp.status == 404 + + @pytest.mark.asyncio + async def test_get_requires_auth(self, auth_adapter): + app = _create_app(auth_adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.get("/v1/responses/resp_any") + assert resp.status == 401 + + +# --------------------------------------------------------------------------- +# DELETE /v1/responses/{response_id} +# --------------------------------------------------------------------------- + + +class TestDeleteResponse: + @pytest.mark.asyncio + async def test_delete_stored_response(self, adapter): + """DELETE removes a stored response and returns confirmation.""" + mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1} + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) + resp = await cli.post( + "/v1/responses", + json={"model": "hermes-agent", "input": "Hi"}, + ) + + data = await resp.json() + response_id = data["id"] + + # Delete it + resp2 = await cli.delete(f"/v1/responses/{response_id}") + assert resp2.status == 200 + data2 = await resp2.json() + assert data2["id"] == response_id + assert data2["object"] == "response" + assert data2["deleted"] is True + + # Verify it's gone + resp3 = await cli.get(f"/v1/responses/{response_id}") + assert resp3.status == 404 + + @pytest.mark.asyncio + async def test_delete_not_found(self, adapter): + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.delete("/v1/responses/resp_nonexistent") + assert resp.status == 404 + + @pytest.mark.asyncio + async def test_delete_requires_auth(self, auth_adapter): + app = _create_app(auth_adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.delete("/v1/responses/resp_any") + assert resp.status == 401 + + +# --------------------------------------------------------------------------- +# Tool calls in output +# --------------------------------------------------------------------------- + + +class TestToolCallsInOutput: + @pytest.mark.asyncio + async def test_tool_calls_in_output(self, adapter): + """When agent returns tool calls, they appear as function_call items.""" + mock_result = { + "final_response": "The result is 42.", + "messages": [ + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_abc123", + "function": { + "name": "calculator", + "arguments": '{"expression": "6*7"}', + }, + } + ], + }, + { + "role": "tool", + "tool_call_id": "call_abc123", + "content": "42", + }, + { + "role": "assistant", + "content": "The result is 42.", + }, + ], + "api_calls": 2, + } + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) + resp = await cli.post( + "/v1/responses", + json={"model": "hermes-agent", "input": "What is 6*7?"}, + ) + + assert resp.status == 200 + data = await resp.json() + output = data["output"] + + # Should have: function_call, function_call_output, message + assert len(output) == 3 + assert output[0]["type"] == "function_call" + assert output[0]["name"] == "calculator" + assert output[0]["arguments"] == '{"expression": "6*7"}' + assert output[0]["call_id"] == "call_abc123" + assert output[1]["type"] == "function_call_output" + assert output[1]["call_id"] == "call_abc123" + assert output[1]["output"] == "42" + assert output[2]["type"] == "message" + assert output[2]["content"][0]["text"] == "The result is 42." + + @pytest.mark.asyncio + async def test_no_tool_calls_still_works(self, adapter): + """Without tool calls, output is just a message.""" + mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1} + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) + resp = await cli.post( + "/v1/responses", + json={"model": "hermes-agent", "input": "Hello"}, + ) + + assert resp.status == 200 + data = await resp.json() + assert len(data["output"]) == 1 + assert data["output"][0]["type"] == "message" + + +# --------------------------------------------------------------------------- +# Usage / token counting +# --------------------------------------------------------------------------- + + +class TestUsageCounting: + @pytest.mark.asyncio + async def test_responses_usage(self, adapter): + """Responses API returns real token counts.""" + mock_result = {"final_response": "Done", "messages": [], "api_calls": 1} + usage = {"input_tokens": 100, "output_tokens": 50, "total_tokens": 150} + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, usage) + resp = await cli.post( + "/v1/responses", + json={"model": "hermes-agent", "input": "Hi"}, + ) + + assert resp.status == 200 + data = await resp.json() + assert data["usage"]["input_tokens"] == 100 + assert data["usage"]["output_tokens"] == 50 + assert data["usage"]["total_tokens"] == 150 + + @pytest.mark.asyncio + async def test_chat_completions_usage(self, adapter): + """Chat completions returns real token counts.""" + mock_result = {"final_response": "Done", "messages": [], "api_calls": 1} + usage = {"input_tokens": 200, "output_tokens": 80, "total_tokens": 280} + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, usage) + resp = await cli.post( + "/v1/chat/completions", + json={ + "model": "hermes-agent", + "messages": [{"role": "user", "content": "Hi"}], + }, + ) + + assert resp.status == 200 + data = await resp.json() + assert data["usage"]["prompt_tokens"] == 200 + assert data["usage"]["completion_tokens"] == 80 + assert data["usage"]["total_tokens"] == 280 + + +# --------------------------------------------------------------------------- +# Truncation +# --------------------------------------------------------------------------- + + +class TestTruncation: + @pytest.mark.asyncio + async def test_truncation_auto_limits_history(self, adapter): + """With truncation=auto, history over 100 messages is trimmed.""" + mock_result = {"final_response": "OK", "messages": [], "api_calls": 1} + + # Pre-seed a stored response with a long history + long_history = [{"role": "user", "content": f"msg {i}"} for i in range(150)] + adapter._response_store.put("resp_prev", { + "response": {"id": "resp_prev", "object": "response"}, + "conversation_history": long_history, + "instructions": None, + }) + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) + resp = await cli.post( + "/v1/responses", + json={ + "model": "hermes-agent", + "input": "follow up", + "previous_response_id": "resp_prev", + "truncation": "auto", + }, + ) + + assert resp.status == 200 + call_kwargs = mock_run.call_args.kwargs + # History should be truncated to 100 + assert len(call_kwargs["conversation_history"]) <= 100 + + @pytest.mark.asyncio + async def test_no_truncation_keeps_full_history(self, adapter): + """Without truncation=auto, long history is passed as-is.""" + mock_result = {"final_response": "OK", "messages": [], "api_calls": 1} + + long_history = [{"role": "user", "content": f"msg {i}"} for i in range(150)] + adapter._response_store.put("resp_prev2", { + "response": {"id": "resp_prev2", "object": "response"}, + "conversation_history": long_history, + "instructions": None, + }) + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) + resp = await cli.post( + "/v1/responses", + json={ + "model": "hermes-agent", + "input": "follow up", + "previous_response_id": "resp_prev2", + }, + ) + + assert resp.status == 200 + call_kwargs = mock_run.call_args.kwargs + assert len(call_kwargs["conversation_history"]) == 150 + + +# --------------------------------------------------------------------------- +# CORS +# --------------------------------------------------------------------------- + + +class TestCORS: + @pytest.mark.asyncio + async def test_cors_headers_on_get(self, adapter): + """CORS headers present on normal responses.""" + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.get("/health") + assert resp.status == 200 + assert resp.headers.get("Access-Control-Allow-Origin") == "*" + assert "POST" in resp.headers.get("Access-Control-Allow-Methods", "") + assert "DELETE" in resp.headers.get("Access-Control-Allow-Methods", "") + + @pytest.mark.asyncio + async def test_cors_options_preflight(self, adapter): + """OPTIONS preflight request returns CORS headers.""" + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + # OPTIONS to a known path — aiohttp will route through middleware + resp = await cli.options("/health") + assert resp.status == 200 + assert resp.headers.get("Access-Control-Allow-Origin") == "*" + assert "Authorization" in resp.headers.get("Access-Control-Allow-Headers", "")