Merge pull request #2091 from dusterbloom/fix/lmstudio-context-length-detection

feat: query local servers for actual context window size
This commit is contained in:
Teknium 2026-03-19 19:08:21 -07:00 committed by GitHub
commit 3a9a1bbb84
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 742 additions and 9 deletions

View file

@ -6569,7 +6569,21 @@ class AIAgent:
self._response_was_previewed = True
break
# No fallback -- append the empty message as-is
# No fallback -- if reasoning_text exists, the model put its
# entire response inside <think> tags; use that as the content.
if reasoning_text:
self._vprint(f"{self.log_prefix}Using reasoning as response content (model wrapped entire response in think tags).", force=True)
final_response = reasoning_text
empty_msg = {
"role": "assistant",
"content": final_response,
"reasoning": reasoning_text,
"finish_reason": finish_reason,
}
messages.append(empty_msg)
break
# Truly empty -- no reasoning and no content
empty_msg = {
"role": "assistant",
"content": final_response,
@ -6577,10 +6591,10 @@ class AIAgent:
"finish_reason": finish_reason,
}
messages.append(empty_msg)
self._cleanup_task_resources(effective_task_id)
self._persist_session(messages, conversation_history)
return {
"final_response": final_response or None,
"messages": messages,