feat(api): implement dynamic max tokens handling for various providers

- Added _max_tokens_param method in AIAgent to return appropriate max tokens parameter based on the provider (OpenAI vs. others).
- Updated API calls in AIAgent to utilize the new max tokens handling.
- Introduced auxiliary_max_tokens_param function in auxiliary_client for consistent max tokens management across auxiliary clients.
- Refactored multiple tools to use auxiliary_max_tokens_param for improved compatibility with different models and providers.
This commit is contained in:
teknium1 2026-02-26 20:23:56 -08:00
parent f0458ebdb8
commit 58fce0a37b
7 changed files with 67 additions and 20 deletions

View file

@ -812,10 +812,11 @@ def _extract_relevant_content(
)
try:
from agent.auxiliary_client import auxiliary_max_tokens_param
response = _aux_vision_client.chat.completions.create(
model=EXTRACTION_MODEL,
messages=[{"role": "user", "content": extraction_prompt}],
max_tokens=4000,
**auxiliary_max_tokens_param(4000),
temperature=0.1,
)
return response.choices[0].message.content
@ -1283,6 +1284,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
)
# Use the sync auxiliary vision client directly
from agent.auxiliary_client import auxiliary_max_tokens_param
response = _aux_vision_client.chat.completions.create(
model=EXTRACTION_MODEL,
messages=[
@ -1294,7 +1296,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
],
}
],
max_tokens=2000,
**auxiliary_max_tokens_param(2000),
temperature=0.1,
)