From cd46f52d2332c78c8f85b321442159f07cc8d852 Mon Sep 17 00:00:00 2001 From: stemirkhan Date: Thu, 16 Apr 2026 20:43:18 +0300 Subject: [PATCH] fix(chat-completions): normalize provider thinking aliases --- app/core/openai/chat_requests.py | 2 + app/core/openai/requests.py | 80 ++++++++++++++++--- .../proposal.md | 16 ++++ .../specs/chat-completions-compat/spec.md | 19 +++++ .../specs/responses-api-compat/spec.md | 18 +++++ .../tasks.md | 14 ++++ .../test_openai_compat_features.py | 23 ++++++ tests/unit/test_chat_request_mapping.py | 32 ++++++++ tests/unit/test_openai_requests.py | 33 ++++++++ 9 files changed, 227 insertions(+), 10 deletions(-) create mode 100644 openspec/changes/normalize-provider-thinking-aliases/proposal.md create mode 100644 openspec/changes/normalize-provider-thinking-aliases/specs/chat-completions-compat/spec.md create mode 100644 openspec/changes/normalize-provider-thinking-aliases/specs/responses-api-compat/spec.md create mode 100644 openspec/changes/normalize-provider-thinking-aliases/tasks.md diff --git a/app/core/openai/chat_requests.py b/app/core/openai/chat_requests.py index afa0c94b5..c400b398d 100644 --- a/app/core/openai/chat_requests.py +++ b/app/core/openai/chat_requests.py @@ -11,6 +11,7 @@ ResponsesRequest, ResponsesTextControls, ResponsesTextFormat, + normalize_reasoning_aliases, normalize_tool_type, validate_tool_types, ) @@ -133,6 +134,7 @@ def to_responses_request(self) -> ResponsesRequest: reasoning_effort = data.pop("reasoning_effort", None) if reasoning_effort is not None and "reasoning" not in data: data["reasoning"] = {"effort": reasoning_effort} + normalize_reasoning_aliases(data) if response_format is not None: _apply_response_format(data, response_format) if isinstance(stream_options, Mapping): diff --git a/app/core/openai/requests.py b/app/core/openai/requests.py index 3f6bd7ba7..5cd02af69 100644 --- a/app/core/openai/requests.py +++ b/app/core/openai/requests.py @@ -522,18 +522,11 @@ def _sanitize_interleaved_reasoning_input(payload: MutableJsonObject) -> None: payload["input"] = _sanitize_input_items(input_items) -def _normalize_openai_compatible_aliases(payload: MutableJsonObject) -> None: +def normalize_reasoning_aliases(payload: MutableJsonObject) -> None: reasoning_effort = payload.pop("reasoningEffort", None) reasoning_summary = payload.pop("reasoningSummary", None) - text_verbosity = payload.pop("textVerbosity", None) - top_level_verbosity = payload.pop("verbosity", None) - prompt_cache_key = payload.pop("promptCacheKey", None) - prompt_cache_retention = payload.pop("promptCacheRetention", None) - - if isinstance(prompt_cache_key, str) and "prompt_cache_key" not in payload: - payload["prompt_cache_key"] = prompt_cache_key - if isinstance(prompt_cache_retention, str) and "prompt_cache_retention" not in payload: - payload["prompt_cache_retention"] = prompt_cache_retention + provider_thinking = payload.pop("thinking", None) + provider_enable_thinking = payload.pop("enable_thinking", None) reasoning_payload = _json_mapping_or_none(payload.get("reasoning")) if reasoning_payload is not None: @@ -545,9 +538,76 @@ def _normalize_openai_compatible_aliases(payload: MutableJsonObject) -> None: reasoning_map["effort"] = reasoning_effort if isinstance(reasoning_summary, str) and "summary" not in reasoning_map: reasoning_map["summary"] = reasoning_summary + + provider_reasoning = _normalize_thinking_alias( + provider_thinking, + enable_thinking=provider_enable_thinking, + ) + if provider_reasoning is not None: + if "effort" not in reasoning_map and "effort" in provider_reasoning: + reasoning_map["effort"] = provider_reasoning["effort"] + if "summary" not in reasoning_map and "summary" in provider_reasoning: + reasoning_map["summary"] = provider_reasoning["summary"] + if reasoning_map: payload["reasoning"] = reasoning_map + +def _normalize_thinking_alias( + thinking: JsonValue, + *, + enable_thinking: JsonValue, +) -> MutableJsonObject | None: + if isinstance(thinking, bool): + return {"effort": "medium"} if thinking else None + if isinstance(thinking, str): + normalized = thinking.strip().lower() + if normalized in {"low", "medium", "high", "xhigh"}: + return {"effort": normalized} + if normalized in {"enabled", "true", "on"}: + return {"effort": "medium"} + if normalized in {"disabled", "false", "off"}: + return None + thinking_mapping = _json_mapping_or_none(thinking) + if thinking_mapping is not None: + normalized: MutableJsonObject = {} + effort = thinking_mapping.get("effort") + summary = thinking_mapping.get("summary") + if isinstance(effort, str) and effort.strip(): + normalized["effort"] = effort.strip().lower() + if isinstance(summary, str) and summary.strip(): + normalized["summary"] = summary.strip() + if normalized: + return normalized + thinking_type = thinking_mapping.get("type") + if isinstance(thinking_type, str): + normalized_type = thinking_type.strip().lower() + if normalized_type == "enabled": + return {"effort": "medium"} + if normalized_type == "disabled": + return None + enabled = thinking_mapping.get("enabled") + if isinstance(enabled, bool): + return {"effort": "medium"} if enabled else None + + if isinstance(enable_thinking, bool): + return {"effort": "medium"} if enable_thinking else None + return None + + +def _normalize_openai_compatible_aliases(payload: MutableJsonObject) -> None: + text_verbosity = payload.pop("textVerbosity", None) + top_level_verbosity = payload.pop("verbosity", None) + prompt_cache_key = payload.pop("promptCacheKey", None) + prompt_cache_retention = payload.pop("promptCacheRetention", None) + + if isinstance(prompt_cache_key, str) and "prompt_cache_key" not in payload: + payload["prompt_cache_key"] = prompt_cache_key + if isinstance(prompt_cache_retention, str) and "prompt_cache_retention" not in payload: + payload["prompt_cache_retention"] = prompt_cache_retention + + normalize_reasoning_aliases(payload) + text_payload = _json_mapping_or_none(payload.get("text")) if text_payload is not None: text_map: MutableJsonObject = dict(text_payload.items()) diff --git a/openspec/changes/normalize-provider-thinking-aliases/proposal.md b/openspec/changes/normalize-provider-thinking-aliases/proposal.md new file mode 100644 index 000000000..244a8a75f --- /dev/null +++ b/openspec/changes/normalize-provider-thinking-aliases/proposal.md @@ -0,0 +1,16 @@ +## Why + +Some OpenAI-compatible clients reuse provider-specific reasoning controls when pointed at `codex-lb`. In particular, Qwen/DeepSeek-style `enable_thinking` and Anthropic-style `thinking` fields can leak through the Chat Completions and Responses compatibility layers and reach the upstream ChatGPT backend unchanged, which causes avoidable upstream validation failures. + +## What Changes + +- Normalize provider-specific thinking aliases into the existing `reasoning` payload before upstream forwarding. +- Apply that normalization to Chat Completions request mapping and to the shared OpenAI-compatible Responses payload sanitation path. +- Drop the original provider-specific alias fields from forwarded upstream payloads. + +## Capabilities + +### Modified Capabilities + +- `chat-completions-compat` +- `responses-api-compat` diff --git a/openspec/changes/normalize-provider-thinking-aliases/specs/chat-completions-compat/spec.md b/openspec/changes/normalize-provider-thinking-aliases/specs/chat-completions-compat/spec.md new file mode 100644 index 000000000..da1254f66 --- /dev/null +++ b/openspec/changes/normalize-provider-thinking-aliases/specs/chat-completions-compat/spec.md @@ -0,0 +1,19 @@ +## ADDED Requirements + +### Requirement: Chat Completions normalizes provider-specific thinking aliases + +When Chat Completions clients send provider-specific reasoning controls that are commonly used by non-OpenAI SDKs, the service MUST normalize those controls into the internal Responses `reasoning` shape before forwarding upstream. The original provider-specific fields MUST NOT be forwarded upstream unchanged. + +#### Scenario: Qwen-style enable_thinking is normalized + +- **WHEN** a client calls `/v1/chat/completions` with `enable_thinking: true` +- **AND** no explicit `reasoning` or `reasoning_effort` override is present +- **THEN** the mapped Responses payload includes `reasoning.effort: "medium"` +- **AND** the forwarded upstream payload does not include `enable_thinking` + +#### Scenario: Anthropic-style thinking object is normalized + +- **WHEN** a client calls `/v1/chat/completions` with `thinking: {"type":"enabled","budget_tokens":2048}` +- **AND** no explicit `reasoning` or `reasoning_effort` override is present +- **THEN** the mapped Responses payload includes `reasoning.effort: "medium"` +- **AND** the forwarded upstream payload does not include `thinking` diff --git a/openspec/changes/normalize-provider-thinking-aliases/specs/responses-api-compat/spec.md b/openspec/changes/normalize-provider-thinking-aliases/specs/responses-api-compat/spec.md new file mode 100644 index 000000000..40ac4b6c8 --- /dev/null +++ b/openspec/changes/normalize-provider-thinking-aliases/specs/responses-api-compat/spec.md @@ -0,0 +1,18 @@ +## ADDED Requirements + +### Requirement: OpenAI-compatible Responses payload sanitation removes provider-specific thinking aliases + +The shared OpenAI-compatible Responses sanitation path MUST normalize third-party thinking aliases into the canonical `reasoning` object before upstream forwarding. Unknown provider-specific thinking controls MUST NOT be passed through unchanged to the upstream ChatGPT backend. + +#### Scenario: Shared payload sanitation maps enable_thinking + +- **WHEN** an internal Responses payload contains `enable_thinking: true` +- **AND** no explicit `reasoning.effort` is already present +- **THEN** the forwarded upstream payload includes `reasoning.effort: "medium"` +- **AND** the forwarded upstream payload does not include `enable_thinking` + +#### Scenario: Explicit reasoning wins over provider aliases + +- **WHEN** an internal Responses payload contains both `reasoning: {"effort":"high"}` and `thinking: {"type":"enabled"}` +- **THEN** the forwarded upstream payload keeps `reasoning.effort: "high"` +- **AND** the forwarded upstream payload does not include `thinking` diff --git a/openspec/changes/normalize-provider-thinking-aliases/tasks.md b/openspec/changes/normalize-provider-thinking-aliases/tasks.md new file mode 100644 index 000000000..758d0aae9 --- /dev/null +++ b/openspec/changes/normalize-provider-thinking-aliases/tasks.md @@ -0,0 +1,14 @@ +## 1. Specs + +- [x] 1.1 Add compatibility requirements for provider-specific thinking alias normalization. +- [x] 1.2 Validate OpenSpec changes. + +## 2. Tests + +- [x] 2.1 Add unit coverage for Chat Completions and shared Responses alias normalization. +- [x] 2.2 Add integration coverage for `/v1/chat/completions` with `enable_thinking`. + +## 3. Implementation + +- [x] 3.1 Normalize `thinking` / `enable_thinking` into `reasoning` before upstream forwarding. +- [x] 3.2 Ensure provider-specific alias fields are removed from forwarded upstream payloads. diff --git a/tests/integration/test_openai_compat_features.py b/tests/integration/test_openai_compat_features.py index 6ceace261..5eaefac10 100644 --- a/tests/integration/test_openai_compat_features.py +++ b/tests/integration/test_openai_compat_features.py @@ -646,6 +646,29 @@ async def fake_stream(payload, headers, access_token, account_id, base_url=None, assert seen["payload"].reasoning.effort == "low" +@pytest.mark.asyncio +async def test_v1_chat_completions_normalizes_enable_thinking(async_client, monkeypatch): + await _import_account(async_client, "acc_chat_enable_thinking", "chat-enable-thinking@example.com") + + seen = {} + + async def fake_stream(payload, headers, access_token, account_id, base_url=None, raise_for_status=False): + seen["payload"] = payload.to_payload() + yield _completed_event("resp_chat_enable_thinking") + + monkeypatch.setattr(proxy_module, "core_stream_responses", fake_stream) + + payload = { + "model": "gpt-5.2", + "messages": [{"role": "user", "content": "Think."}], + "enable_thinking": True, + } + resp = await async_client.post("/v1/chat/completions", json=payload) + assert resp.status_code == 200 + assert seen["payload"]["reasoning"] == {"effort": "medium"} + assert "enable_thinking" not in seen["payload"] + + @pytest.mark.asyncio async def test_v1_chat_completions_forwards_service_tier(async_client, monkeypatch): await _import_account(async_client, "acc_chat_service_tier", "chat-service-tier@example.com") diff --git a/tests/unit/test_chat_request_mapping.py b/tests/unit/test_chat_request_mapping.py index 10f52b9cb..c0a23e304 100644 --- a/tests/unit/test_chat_request_mapping.py +++ b/tests/unit/test_chat_request_mapping.py @@ -123,6 +123,38 @@ def test_chat_reasoning_effort_maps_to_responses_reasoning(): assert reasoning_map.get("effort") == "high" +def test_chat_enable_thinking_maps_to_default_reasoning_effort(): + payload = { + "model": "gpt-5.2", + "messages": [{"role": "user", "content": "hi"}], + "enable_thinking": True, + } + req = ChatCompletionsRequest.model_validate(payload) + responses = req.to_responses_request() + dumped = responses.to_payload() + assert "enable_thinking" not in dumped + reasoning = dumped.get("reasoning") + assert isinstance(reasoning, Mapping) + reasoning_map = cast(Mapping[str, JsonValue], reasoning) + assert reasoning_map.get("effort") == "medium" + + +def test_chat_anthropic_thinking_alias_maps_to_default_reasoning_effort(): + payload = { + "model": "gpt-5.2", + "messages": [{"role": "user", "content": "hi"}], + "thinking": {"type": "enabled", "budget_tokens": 2048}, + } + req = ChatCompletionsRequest.model_validate(payload) + responses = req.to_responses_request() + dumped = responses.to_payload() + assert "thinking" not in dumped + reasoning = dumped.get("reasoning") + assert isinstance(reasoning, Mapping) + reasoning_map = cast(Mapping[str, JsonValue], reasoning) + assert reasoning_map.get("effort") == "medium" + + def test_chat_service_tier_is_preserved_in_responses_payload(): payload = { "model": "gpt-5.2", diff --git a/tests/unit/test_openai_requests.py b/tests/unit/test_openai_requests.py index 95c3db1ba..7c83d0ce2 100644 --- a/tests/unit/test_openai_requests.py +++ b/tests/unit/test_openai_requests.py @@ -216,6 +216,39 @@ def test_openai_compatible_reasoning_aliases_are_normalized(): assert "reasoningSummary" not in dumped +def test_provider_thinking_aliases_are_normalized(): + payload = { + "model": "gpt-5.1", + "instructions": "hi", + "input": [], + "thinking": {"type": "enabled", "budget_tokens": 2048}, + "enable_thinking": True, + } + request = ResponsesRequest.model_validate(payload) + + dumped = request.to_payload() + assert dumped["reasoning"] == {"effort": "medium"} + assert "thinking" not in dumped + assert "enable_thinking" not in dumped + + +def test_explicit_reasoning_wins_over_provider_thinking_aliases(): + payload = { + "model": "gpt-5.1", + "instructions": "hi", + "input": [], + "reasoning": {"effort": "high"}, + "thinking": {"type": "enabled"}, + "enable_thinking": True, + } + request = ResponsesRequest.model_validate(payload) + + dumped = request.to_payload() + assert dumped["reasoning"] == {"effort": "high"} + assert "thinking" not in dumped + assert "enable_thinking" not in dumped + + def test_openai_compatible_text_verbosity_alias_is_normalized(): payload = { "model": "gpt-5.1",