diff --git a/app/core/clients/proxy.py b/app/core/clients/proxy.py index a60eeab2e..6a8a66cdf 100644 --- a/app/core/clients/proxy.py +++ b/app/core/clients/proxy.py @@ -944,18 +944,34 @@ def _is_native_codex_originator(originator: str | None) -> bool: return stripped in _NATIVE_CODEX_ORIGINATORS +def _payload_uses_image_generation_tool(payload: Mapping[str, JsonValue]) -> bool: + tools = payload.get("tools") + if not isinstance(tools, list): + return False + for tool in tools: + if not isinstance(tool, dict): + continue + tool_type = tool.get("type") + if tool_type == "image_generation": + return True + return False + + def _resolve_stream_transport( *, transport: str, transport_override: str | None, model: str | None, headers: Mapping[str, str], + has_image_generation_tool: bool = False, ) -> str: configured = _configured_stream_transport(transport=transport, transport_override=transport_override) if configured == "websocket": return "websocket" if configured == "http": return "http" + if has_image_generation_tool: + return "http" if _has_native_codex_transport_headers(headers): return "websocket" @@ -1598,6 +1614,7 @@ async def stream_responses( transport_override=upstream_stream_transport_override, model=payload.model, headers=headers, + has_image_generation_tool=_payload_uses_image_generation_tool(payload_dict), ) if transport == "websocket": upstream_headers = _build_upstream_websocket_headers(headers, access_token, account_id) diff --git a/app/core/config/settings.py b/app/core/config/settings.py index a80fc7ffc..c02d639dd 100644 --- a/app/core/config/settings.py +++ b/app/core/config/settings.py @@ -135,7 +135,11 @@ class Settings(BaseSettings): compact_request_budget_seconds: float = Field(default=75.0, gt=0) stream_idle_timeout_seconds: float = 300.0 proxy_downstream_websocket_idle_timeout_seconds: float = Field(default=120.0, gt=0) - max_sse_event_bytes: int = Field(default=2 * 1024 * 1024, gt=0) + # Applies to both upstream SSE event buffering and upstream websocket message + # frames. Keep the default aligned with the common 16 MiB websocket ceiling so + # large built-in tool payloads (for example image_generation outputs) do not + # fail locally with a 1009 before upstream completion. + max_sse_event_bytes: int = Field(default=16 * 1024 * 1024, gt=0) auth_base_url: str = "https://auth.openai.com" oauth_client_id: str = "app_EMoamEEZ73f0CkXaXp7hrann" oauth_originator: str = "codex_chatgpt_desktop" diff --git a/app/core/openai/requests.py b/app/core/openai/requests.py index 3f6bd7ba7..7cfef32b2 100644 --- a/app/core/openai/requests.py +++ b/app/core/openai/requests.py @@ -69,7 +69,7 @@ def normalize_tool_choice(choice: JsonValue | None) -> JsonValue | None: return choice -def validate_tool_types(tools: list[JsonValue]) -> list[JsonValue]: +def validate_tool_types(tools: list[JsonValue], *, allow_builtin_tools: bool = False) -> list[JsonValue]: normalized_tools: list[JsonValue] = [] for tool in tools: if not is_json_mapping(tool): @@ -83,7 +83,7 @@ def validate_tool_types(tools: list[JsonValue]) -> list[JsonValue]: tool = dict(tool_mapping) tool["type"] = normalized_type tool_type = normalized_type - if tool_type in UNSUPPORTED_TOOL_TYPES: + if not allow_builtin_tools and tool_type in UNSUPPORTED_TOOL_TYPES: raise ValueError(f"Unsupported tool type: {tool_type}") normalized_tools.append(tool) return normalized_tools @@ -379,7 +379,7 @@ def _normalize_previous_response_id(cls, value: str | None) -> str | None: @field_validator("tools") @classmethod def _validate_tools(cls, value: list[JsonValue]) -> list[JsonValue]: - return validate_tool_types(value) + return validate_tool_types(value, allow_builtin_tools=True) @field_validator("tool_choice") @classmethod @@ -511,6 +511,9 @@ def _sort_keys_recursive(value: JsonValue) -> JsonValue: def _strip_compact_unsupported_fields(payload: MutableJsonObject) -> MutableJsonObject: payload = _strip_unsupported_fields(payload) payload.pop("store", None) + payload.pop("tools", None) + payload.pop("tool_choice", None) + payload.pop("parallel_tool_calls", None) return payload diff --git a/app/core/openai/v1_requests.py b/app/core/openai/v1_requests.py index 19974290f..a430ac638 100644 --- a/app/core/openai/v1_requests.py +++ b/app/core/openai/v1_requests.py @@ -54,7 +54,7 @@ def _ensure_store_false(cls, value: bool | None) -> bool | None: @field_validator("tools") @classmethod def _validate_tools(cls, value: list[JsonValue]) -> list[JsonValue]: - return validate_tool_types(value) + return validate_tool_types(value, allow_builtin_tools=True) @model_validator(mode="after") def _validate_input(self) -> "V1ResponsesRequest": diff --git a/app/modules/proxy/load_balancer.py b/app/modules/proxy/load_balancer.py index c2cabd0a1..291e96362 100644 --- a/app/modules/proxy/load_balancer.py +++ b/app/modules/proxy/load_balancer.py @@ -170,10 +170,11 @@ async def load_selection_inputs() -> _SelectionInputs: runtime=self._runtime, ) - result = select_account( + result = _select_account_preferring_budget_safe( states, prefer_earlier_reset=prefer_earlier_reset_accounts, routing_strategy=routing_strategy, + budget_threshold_pct=budget_threshold_pct, ) selected_account_map = account_map @@ -645,10 +646,11 @@ async def _select_with_stickiness( sticky_repo: StickySessionsRepository | None, ) -> SelectionResult: if not sticky_key or not sticky_repo: - return select_account( + return _select_account_preferring_budget_safe( states, prefer_earlier_reset=prefer_earlier_reset_accounts, routing_strategy=routing_strategy, + budget_threshold_pct=budget_threshold_pct, ) if sticky_kind is None: raise ValueError("sticky_kind is required when sticky_key is provided") @@ -670,14 +672,16 @@ async def _select_with_stickiness( if existing: pinned = next((state for state in states if state.account_id == existing), None) if pinned is not None: - # Check if pinned account has insufficient budget (< 5% remaining) - # or rate limit is far away (reset_at more than 10 minutes away) + # Proactively rebind session affinity for prompt-cache and + # codex sessions once the pinned account is already above the + # configured budget threshold. That preserves continuity below + # the threshold while avoiding obvious short-window failures + # once the session is skating on the edge of exhaustion. now = time.time() - budget_exhausted = ( - sticky_kind == StickySessionKind.PROMPT_CACHE + budget_pressured = ( + sticky_kind in (StickySessionKind.PROMPT_CACHE, StickySessionKind.CODEX_SESSION) and pinned.status != AccountStatus.RATE_LIMITED - and pinned.used_percent is not None - and pinned.used_percent > budget_threshold_pct + and _state_above_budget_threshold(pinned, budget_threshold_pct) ) rate_limit_far_away = ( sticky_kind == StickySessionKind.PROMPT_CACHE @@ -685,7 +689,7 @@ async def _select_with_stickiness( and pinned.reset_at is not None and pinned.reset_at - now >= 600 # 10 minutes ) - if not (budget_exhausted or rate_limit_far_away): + if not (budget_pressured or rate_limit_far_away): pinned_result = select_account( [pinned], prefer_earlier_reset=prefer_earlier_reset_accounts, @@ -702,19 +706,17 @@ async def _select_with_stickiness( # is above the budget threshold, reallocating just # wastes DB writes and destroys prompt-cache locality # (thrashing). - if budget_exhausted: - pool_best = select_account( + if budget_pressured: + pool_best = _select_account_preferring_budget_safe( states, prefer_earlier_reset=prefer_earlier_reset_accounts, routing_strategy=routing_strategy, deterministic_probe=True, + budget_threshold_pct=budget_threshold_pct, ) pool_also_exhausted = pool_best.account is not None and ( pool_best.account.account_id == pinned.account_id - or ( - pool_best.account.used_percent is not None - and pool_best.account.used_percent > budget_threshold_pct - ) + or _state_above_budget_threshold(pool_best.account, budget_threshold_pct) ) if pool_also_exhausted: pinned_result = select_account( @@ -769,10 +771,11 @@ async def _select_with_stickiness( else: await sticky_repo.delete(sticky_key, kind=sticky_kind) - chosen = select_account( + chosen = _select_account_preferring_budget_safe( states, prefer_earlier_reset=prefer_earlier_reset_accounts, routing_strategy=routing_strategy, + budget_threshold_pct=budget_threshold_pct, ) if persist_fallback and chosen.account is not None and chosen.account.account_id in account_map: await sticky_repo.upsert(sticky_key, chosen.account.account_id, kind=sticky_kind) @@ -1295,6 +1298,43 @@ def _additional_usage_is_exhausted(entry: AdditionalUsageHistory) -> bool: return float(entry.used_percent) >= 100.0 +def _state_above_budget_threshold(state: AccountState, budget_threshold_pct: float) -> bool: + return any( + used_percent is not None and used_percent > budget_threshold_pct + for used_percent in (state.used_percent, state.secondary_used_percent) + ) + + +def _select_account_preferring_budget_safe( + states: Iterable[AccountState], + *, + prefer_earlier_reset: bool, + routing_strategy: RoutingStrategy, + budget_threshold_pct: float, + allow_backoff_fallback: bool = True, + deterministic_probe: bool = False, +) -> SelectionResult: + state_list = list(states) + preferred_states = [state for state in state_list if not _state_above_budget_threshold(state, budget_threshold_pct)] + if preferred_states and len(preferred_states) != len(state_list): + preferred = select_account( + preferred_states, + prefer_earlier_reset=prefer_earlier_reset, + routing_strategy=routing_strategy, + allow_backoff_fallback=allow_backoff_fallback, + deterministic_probe=deterministic_probe, + ) + if preferred.account is not None: + return preferred + return select_account( + state_list, + prefer_earlier_reset=prefer_earlier_reset, + routing_strategy=routing_strategy, + allow_backoff_fallback=allow_backoff_fallback, + deterministic_probe=deterministic_probe, + ) + + def _is_upstream_circuit_breaker_open() -> bool: settings = get_settings() if not getattr(settings, "circuit_breaker_enabled", False): diff --git a/openspec/changes/raise-upstream-event-size-limit/proposal.md b/openspec/changes/raise-upstream-event-size-limit/proposal.md new file mode 100644 index 000000000..3ae864251 --- /dev/null +++ b/openspec/changes/raise-upstream-event-size-limit/proposal.md @@ -0,0 +1,15 @@ +# Proposal: raise-upstream-event-size-limit + +## Why + +Recent Codex Desktop builds can request built-in tools such as `image_generation`, which may produce large upstream Responses events. The proxy currently caps upstream SSE events and websocket message frames at 2 MiB, which is too low for legitimate image payloads and causes local websocket `1009 message too big` disconnects before `response.completed`. + +## What Changes + +- Raise the default upstream Responses event/message size limit from 2 MiB to 16 MiB. +- Keep the existing configuration knob (`max_sse_event_bytes`) so operators can still override the limit. + +## Impact + +- Prevents local `1009` disconnects for large but valid Responses tool outputs. +- Aligns the default limit with the common 16 MiB websocket ceiling already assumed by the proxy's `response.create` budget logic. diff --git a/openspec/changes/raise-upstream-event-size-limit/specs/responses-api-compat/spec.md b/openspec/changes/raise-upstream-event-size-limit/specs/responses-api-compat/spec.md new file mode 100644 index 000000000..dd6e57cb0 --- /dev/null +++ b/openspec/changes/raise-upstream-event-size-limit/specs/responses-api-compat/spec.md @@ -0,0 +1,7 @@ +## MODIFIED Requirements +### Requirement: Upstream Responses event size budget +The service SHALL allow upstream Responses SSE events and upstream websocket message frames up to 16 MiB by default before treating them as oversized. + +#### Scenario: built-in tool output exceeds the old 2 MiB limit +- **WHEN** upstream Responses traffic includes a single SSE event or websocket message frame larger than 2 MiB but not larger than 16 MiB +- **THEN** the proxy continues processing the event instead of closing the upstream websocket locally with `1009 message too big` diff --git a/openspec/changes/raise-upstream-event-size-limit/tasks.md b/openspec/changes/raise-upstream-event-size-limit/tasks.md new file mode 100644 index 000000000..666211ef5 --- /dev/null +++ b/openspec/changes/raise-upstream-event-size-limit/tasks.md @@ -0,0 +1,8 @@ +## 1. Implementation + +- [x] 1.1 Raise the default upstream Responses event/message size limit to 16 MiB. + +## 2. Verification + +- [x] 2.1 Add or update settings coverage for the new default. +- [x] 2.2 Run targeted pytest, ruff, and `openspec validate --specs`. diff --git a/openspec/changes/reallocate-codex-session-budget-pressure/proposal.md b/openspec/changes/reallocate-codex-session-budget-pressure/proposal.md new file mode 100644 index 000000000..420092f64 --- /dev/null +++ b/openspec/changes/reallocate-codex-session-budget-pressure/proposal.md @@ -0,0 +1,18 @@ +## Why + +Backend Codex routes use durable `codex_session` stickiness when the client sends a `session_id` header. Today that stickiness preserves continuity too aggressively: if the pinned account remains `ACTIVE` but its short-window usage is already above the sticky reallocation threshold, selection still keeps routing the session there until upstream starts returning `usage_limit_reached`. + +Separately, fresh selection still lets near-exhausted active accounts compete with budget-safe accounts until a hard failure occurs. In production this surfaces as repeated compact and websocket failures on accounts whose latest local primary-window usage is already in the `97-99%` range, even while other active accounts still have healthy short-window budget. + +## What Changes + +- extend proactive sticky reallocation to durable backend `codex_session` mappings when the pinned account is above the configured budget threshold and a healthier candidate exists +- prefer budget-safe Responses routing candidates over already-pressured candidates whenever at least one budget-safe candidate exists +- keep existing durable `codex_session` behavior below that threshold +- add regression coverage for backend Codex responses + compact routing with `session_id` + +## Impact + +- backend Codex sessions may rebind to a different account slightly earlier, before the pinned account hard-fails upstream on short-window budget exhaustion +- fresh Responses requests will prefer accounts that are still below the configured budget threshold instead of spending more attempts on near-exhausted accounts first +- OpenAI `/v1` routes still do not create durable `codex_session` mappings from `session_id`, but they do benefit from the same budget-safe fresh-selection preference diff --git a/openspec/changes/reallocate-codex-session-budget-pressure/specs/responses-api-compat/spec.md b/openspec/changes/reallocate-codex-session-budget-pressure/specs/responses-api-compat/spec.md new file mode 100644 index 000000000..5b45ddfe9 --- /dev/null +++ b/openspec/changes/reallocate-codex-session-budget-pressure/specs/responses-api-compat/spec.md @@ -0,0 +1,9 @@ +## MODIFIED Requirements +### Requirement: Responses routing prefers budget-safe accounts +When serving Responses routes, the service MUST prefer eligible accounts that are still below the configured budget threshold over eligible accounts already above that threshold. If no below-threshold candidate exists, the service MAY fall back to the pressured candidates. + +#### Scenario: Fresh Responses request avoids a near-exhausted account +- **WHEN** `/backend-api/codex/responses`, `/backend-api/codex/responses/compact`, `/v1/responses`, or `/v1/responses/compact` selects among multiple eligible active accounts +- **AND** one candidate is above the configured budget threshold +- **AND** another candidate remains below that threshold +- **THEN** the below-threshold candidate is chosen first diff --git a/openspec/changes/reallocate-codex-session-budget-pressure/specs/sticky-session-operations/spec.md b/openspec/changes/reallocate-codex-session-budget-pressure/specs/sticky-session-operations/spec.md new file mode 100644 index 000000000..671ba161c --- /dev/null +++ b/openspec/changes/reallocate-codex-session-budget-pressure/specs/sticky-session-operations/spec.md @@ -0,0 +1,13 @@ +## MODIFIED Requirements +### Requirement: Sticky sessions are explicitly typed +The system SHALL persist each sticky-session mapping with an explicit kind so durable Codex backend affinity, durable dashboard sticky-thread routing, and bounded prompt-cache affinity can be managed independently. + +#### Scenario: Backend Codex session affinity is stored as durable +- **WHEN** a backend Codex request creates or refreshes stickiness from `session_id` +- **THEN** the stored mapping kind is `codex_session` + +#### Scenario: Backend Codex session rebinds under budget pressure +- **WHEN** a backend Codex request resolves an existing `codex_session` mapping +- **AND** the pinned account is above the configured sticky reallocation budget threshold +- **AND** another eligible account remains below that threshold +- **THEN** selection rebinds the durable `codex_session` mapping to the healthier account before sending the request upstream diff --git a/openspec/changes/reallocate-codex-session-budget-pressure/tasks.md b/openspec/changes/reallocate-codex-session-budget-pressure/tasks.md new file mode 100644 index 000000000..3ae48c705 --- /dev/null +++ b/openspec/changes/reallocate-codex-session-budget-pressure/tasks.md @@ -0,0 +1,11 @@ +## 1. Implementation + +- [x] 1.1 Update sticky selection so backend `codex_session` mappings reallocate when the pinned account is above the sticky budget threshold and a healthier candidate exists +- [x] 1.2 Preserve existing durable `codex_session` behavior when the pinned account is still below the threshold +- [x] 1.3 Prefer budget-safe Responses routing candidates over pressured candidates when any budget-safe option exists + +## 2. Verification + +- [x] 2.1 Add integration coverage for backend Codex `session_id` routing that proves reallocation above threshold +- [x] 2.2 Run the affected sticky-session integration tests +- [x] 2.3 Add targeted selection coverage for fresh routing that proves a budget-safe account wins over a pressured one diff --git a/openspec/changes/route-image-generation-over-http/.openspec.yaml b/openspec/changes/route-image-generation-over-http/.openspec.yaml new file mode 100644 index 000000000..863bff183 --- /dev/null +++ b/openspec/changes/route-image-generation-over-http/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-17 diff --git a/openspec/changes/route-image-generation-over-http/design.md b/openspec/changes/route-image-generation-over-http/design.md new file mode 100644 index 000000000..5e2b1394f --- /dev/null +++ b/openspec/changes/route-image-generation-over-http/design.md @@ -0,0 +1,63 @@ +## Context + +The proxy currently chooses upstream Responses transport in `app/core/clients/proxy.py`. In `auto` mode it prefers websocket for native Codex headers and websocket-preferred models such as `gpt-5.4`. That works well for normal text flows, but `image_generation` is different: OpenAI documents that the tool returns generated image data inline as base64, so large events are normal rather than pathological. + +The recent 16 MiB limit increase reduces failures, but it still applies one shared ceiling to both SSE event buffering and websocket message frames. Continuing to route image-generation traffic over websocket keeps the proxy exposed to avoidable large-frame failures and forces operators to keep raising a global limit for a narrow tool-specific behavior. + +## Goals / Non-Goals + +**Goals:** + +- Make auto upstream transport choose the safer HTTP/SSE path for Responses requests that include `image_generation`. +- Preserve existing operator controls and existing websocket preference logic for non-image-generation requests. +- Keep the change local to transport selection without altering request payload semantics. + +**Non-Goals:** + +- Do not mutate image-generation tool options such as `size`, `quality`, `format`, `compression`, or `partial_images`. +- Do not retry an already-started websocket image-generation request over HTTP. +- Do not change compact routing, which already strips tool fields before calling the upstream compact endpoint. + +## Decisions + +### Detect `image_generation` from the serialized Responses payload + +Use the already-materialized payload dictionary in `stream_responses()` to determine whether the request contains `tools[*].type == "image_generation"`. + +Why: + +- The payload is already canonicalized there, so the check is cheap and uses the same data that will go upstream. +- This avoids duplicating request-model-specific logic elsewhere in the proxy stack. + +Alternative considered: + +- Recompute the signal from the Pydantic request model in multiple call sites. Rejected because it spreads the policy across more than one layer. + +### Override only `auto` transport + +When `upstream_stream_transport` resolves to `auto`, `image_generation` forces upstream HTTP. Explicit `http` or `websocket` settings still take precedence. + +Why: + +- Operator overrides should stay authoritative. +- The issue is specifically that the default heuristic chooses websocket for requests whose payload shape makes HTTP safer. + +Alternative considered: + +- Force HTTP even when the operator explicitly configured websocket. Rejected because it breaks the existing transport-control contract. + +## Risks / Trade-offs + +- [Image-generation requests may lose websocket-specific latency benefits] → This is intentional; correctness and transport fit matter more than websocket preference for large image payloads. +- [Future built-in tools may have similar large-payload behavior] → Keep the rule narrow for now and extend only when real evidence justifies it. +- [Large HTTP/SSE events still rely on the configured byte ceiling] → The existing 16 MiB default remains in place as a separate safeguard. + +## Migration Plan + +- Deploy the transport-selection change without config migration. +- If operators explicitly want websocket for image generation despite the risk, they can still force `upstream_stream_transport=websocket`. +- Rollback is a code rollback only; there is no persisted state change. + +## Open Questions + +- None. diff --git a/openspec/changes/route-image-generation-over-http/proposal.md b/openspec/changes/route-image-generation-over-http/proposal.md new file mode 100644 index 000000000..6c4bafbdc --- /dev/null +++ b/openspec/changes/route-image-generation-over-http/proposal.md @@ -0,0 +1,25 @@ +## Why + +Responses `image_generation` calls can return large inline base64 image payloads. The current auto upstream transport logic still prefers websocket for websocket-preferred models such as `gpt-5.4`, which makes those legitimate image outputs hit the proxy's websocket frame ceiling and fail locally with `1009 message too big`. + +## What Changes + +- Modify auto upstream transport selection so Responses requests containing the built-in `image_generation` tool use upstream HTTP/SSE instead of upstream websocket. +- Keep explicit operator overrides intact: `upstream_stream_transport=http` and `upstream_stream_transport=websocket` continue to win over auto policy. +- Add regression coverage for transport resolution and end-to-end streaming path selection when `image_generation` is present. + +## Capabilities + +### New Capabilities + +None. + +### Modified Capabilities + +- `responses-api-compat`: auto upstream transport selection changes for Responses requests that include the `image_generation` built-in tool. + +## Impact + +- Affects upstream transport selection in [app/core/clients/proxy.py](/Users/hughdo/Desktop/Proj/codex-lb/app/core/clients/proxy.py). +- Adds regression coverage in `tests/unit/test_proxy_utils.py`. +- Keeps compact request sanitization and explicit transport overrides unchanged. diff --git a/openspec/changes/route-image-generation-over-http/specs/responses-api-compat/spec.md b/openspec/changes/route-image-generation-over-http/specs/responses-api-compat/spec.md new file mode 100644 index 000000000..ddd35dc59 --- /dev/null +++ b/openspec/changes/route-image-generation-over-http/specs/responses-api-compat/spec.md @@ -0,0 +1,40 @@ +## MODIFIED Requirements + +### Requirement: Upstream Responses transport strategy +For streaming Codex/Responses proxy requests, the system MUST let operators choose the upstream transport strategy through dashboard settings. The resolved strategy MAY be `auto`, `http`, or `websocket`, and `default` MUST defer to the server configuration default. + +#### Scenario: Dashboard forces websocket upstream transport +- **WHEN** the dashboard setting `upstream_stream_transport` is set to `"websocket"` +- **THEN** streaming Responses requests use the upstream websocket transport + +#### Scenario: Dashboard forces HTTP upstream transport +- **WHEN** the dashboard setting `upstream_stream_transport` is set to `"http"` +- **THEN** streaming Responses requests use the upstream HTTP/SSE transport + +#### Scenario: Auto transport falls back when websocket upgrades are rejected +- **WHEN** the resolved upstream transport strategy is `"auto"` +- **AND** auto selection chose the websocket transport +- **AND** the upstream rejects the websocket upgrade with HTTP `426` +- **THEN** the proxy retries the request over the upstream HTTP/SSE transport + +#### Scenario: Session affinity alone does not trigger websocket upstream transport +- **WHEN** the resolved upstream transport strategy is `"auto"` +- **AND** a request includes a `session_id` +- **AND** it does not include an allowlisted native Codex `originator` or explicit Codex websocket feature headers +- **THEN** the auto strategy MUST keep using the existing model-preference transport selection rules + +#### Scenario: Auto transport honors websocket-preferred bootstrap models before registry warmup +- **WHEN** the resolved upstream transport strategy is `"auto"` +- **AND** the model registry has not loaded a snapshot yet +- **AND** the request targets a locally bootstrapped websocket-preferred model family such as `gpt-5.4` or `gpt-5.4-*` +- **AND** the request does not include the built-in `image_generation` tool +- **THEN** the proxy chooses the upstream websocket transport + +#### Scenario: Auto transport prefers HTTP for image-generation tool requests +- **WHEN** the resolved upstream transport strategy is `"auto"` +- **AND** the request includes a built-in `image_generation` tool +- **THEN** the proxy chooses the upstream HTTP/SSE transport even if the model would otherwise prefer websocket + +#### Scenario: Legacy settings preserve the pre-feature default +- **WHEN** transport selection runs against a legacy settings object that does not expose the newer upstream transport fields +- **THEN** the proxy MUST preserve the pre-feature HTTP transport default for model-preference auto-selection unless an explicit legacy websocket mode or native Codex websocket signal opts in diff --git a/openspec/changes/route-image-generation-over-http/tasks.md b/openspec/changes/route-image-generation-over-http/tasks.md new file mode 100644 index 000000000..ea90d1549 --- /dev/null +++ b/openspec/changes/route-image-generation-over-http/tasks.md @@ -0,0 +1,9 @@ +## 1. Transport policy + +- [x] 1.1 Add a Responses payload helper that detects the built-in `image_generation` tool. +- [x] 1.2 Update auto upstream transport selection to prefer HTTP when that helper matches, while preserving explicit transport overrides. + +## 2. Verification + +- [x] 2.1 Add regression coverage for transport resolution and stream path selection with `image_generation`. +- [x] 2.2 Run targeted pytest, ruff, and `openspec validate --specs`. diff --git a/openspec/changes/support-responses-builtin-tools/proposal.md b/openspec/changes/support-responses-builtin-tools/proposal.md new file mode 100644 index 000000000..dc22faf81 --- /dev/null +++ b/openspec/changes/support-responses-builtin-tools/proposal.md @@ -0,0 +1,17 @@ +# Proposal: support-responses-builtin-tools + +## Why + +Recent Codex Desktop builds now send newer built-in Responses tools such as `image_generation` and computer-use tool definitions. The current proxy still rejects those tool objects on full Responses routes and forwards them unchanged to the compact endpoint, which causes upstream `400 invalid_request_error` failures. + +## What Changes + +- Allow built-in Responses tools to pass through on `/backend-api/codex/responses` and `/v1/responses`. +- Keep Chat Completions compatibility behavior unchanged: only `web_search` remains supported there. +- Sanitize `/backend-api/codex/responses/compact` and `/v1/responses/compact` requests so tool-related fields are removed before the upstream compact call. + +## Impact + +- Restores compatibility with newer Codex Desktop request payloads. +- Reduces future breakage from new built-in Responses tool types on the full Responses path. +- Prevents compact requests from failing when desktop clients reuse full Responses payload shapes. diff --git a/openspec/changes/support-responses-builtin-tools/specs/responses-api-compat/spec.md b/openspec/changes/support-responses-builtin-tools/specs/responses-api-compat/spec.md new file mode 100644 index 000000000..f8c282cb4 --- /dev/null +++ b/openspec/changes/support-responses-builtin-tools/specs/responses-api-compat/spec.md @@ -0,0 +1,15 @@ +## MODIFIED Requirements +### Requirement: Responses-compatible tool payload handling +The service SHALL accept built-in Responses tool definitions on `/backend-api/codex/responses` and `/v1/responses` without locally rejecting them. The service MAY normalize documented aliases, but upstream model/tool compatibility validation MUST remain the upstream contract. + +#### Scenario: full Responses request includes built-in tools +- **WHEN** a client sends `/backend-api/codex/responses` or `/v1/responses` with built-in Responses tools such as `image_generation`, `computer_use`, `computer_use_preview`, `file_search`, or `code_interpreter` +- **THEN** the proxy forwards those tool objects upstream instead of returning a local `invalid_request_error` + +### Requirement: Compact requests drop tool-only fields +The service SHALL remove `tools`, `tool_choice`, and `parallel_tool_calls` from compact request payloads before calling the upstream compact endpoint. + +#### Scenario: compact request reuses a full Responses payload shape +- **WHEN** a client sends `/backend-api/codex/responses/compact` or `/v1/responses/compact` with `tools`, `tool_choice`, or `parallel_tool_calls` +- **THEN** the proxy drops those fields before the upstream compact request +- **AND** the compact request continues without a local or upstream `invalid_request_error` caused by `param="tools"` diff --git a/openspec/changes/support-responses-builtin-tools/tasks.md b/openspec/changes/support-responses-builtin-tools/tasks.md new file mode 100644 index 000000000..c8f48ae08 --- /dev/null +++ b/openspec/changes/support-responses-builtin-tools/tasks.md @@ -0,0 +1,10 @@ +## 1. Implementation + +- [x] 1.1 Allow built-in Responses tools to pass through request normalization for `/backend-api/codex/responses` and `/v1/responses`. +- [x] 1.2 Strip tool-related fields from compact request payloads before the upstream compact call. + +## 2. Verification + +- [x] 2.1 Update unit tests for Responses request normalization and compact sanitization. +- [x] 2.2 Update integration tests for `/v1/responses` tool passthrough and `/backend-api/codex/responses/compact` tool stripping. +- [x] 2.3 Run targeted pytest, ruff, and `openspec validate --specs`. diff --git a/tests/integration/test_openai_compat_features.py b/tests/integration/test_openai_compat_features.py index 6ceace261..7ff68d90c 100644 --- a/tests/integration/test_openai_compat_features.py +++ b/tests/integration/test_openai_compat_features.py @@ -137,7 +137,18 @@ async def fake_stream(payload, headers, access_token, account_id, base_url=None, {"type": "image_generation"}, ], ) -async def test_v1_responses_rejects_builtin_tools(async_client, tool_payload): +async def test_v1_responses_forwards_builtin_tools(async_client, monkeypatch, tool_payload): + await _import_account(async_client, "acc_builtin_tools", "builtin-tools@example.com") + + seen = {} + + async def fake_stream(payload, headers, access_token, account_id, base_url=None, raise_for_status=False): + del headers, access_token, account_id, base_url, raise_for_status + seen["payload"] = payload + yield _completed_event("resp_builtin_tools") + + monkeypatch.setattr(proxy_module, "core_stream_responses", fake_stream) + request_payload = { "model": "gpt-5.2", "input": [ @@ -150,8 +161,8 @@ async def test_v1_responses_rejects_builtin_tools(async_client, tool_payload): } resp = await async_client.post("/v1/responses", json=request_payload) - assert resp.status_code == 400 - assert resp.json()["error"]["type"] == "invalid_request_error" + assert resp.status_code == 200 + assert seen["payload"].tools == [tool_payload] @pytest.mark.asyncio diff --git a/tests/integration/test_proxy_compact.py b/tests/integration/test_proxy_compact.py index c7c39f708..870f1cbdf 100644 --- a/tests/integration/test_proxy_compact.py +++ b/tests/integration/test_proxy_compact.py @@ -101,6 +101,44 @@ async def test_proxy_compact_no_accounts(async_client): assert error["code"] == "no_accounts" +@pytest.mark.asyncio +async def test_proxy_compact_strips_tool_fields_before_upstream(async_client, monkeypatch): + email = "compact-tools@example.com" + raw_account_id = "acc_compact_tools" + auth_json = _make_auth_json(raw_account_id, email) + files = {"auth_json": ("auth.json", json.dumps(auth_json), "application/json")} + response = await async_client.post("/api/accounts/import", files=files) + assert response.status_code == 200 + + seen_payloads: list[dict[str, object]] = [] + + async def fake_compact(payload, headers, access_token, account_id): + del headers, access_token, account_id + seen_payloads.append(cast(dict[str, object], payload.to_payload())) + return CompactResponsePayload.model_validate({"object": "response.compaction", "output": []}) + + monkeypatch.setattr(proxy_module, "core_compact_responses", fake_compact) + + payload = { + "model": "gpt-5.1", + "instructions": "hi", + "input": [], + "tools": [{"type": "image_generation"}], + "tool_choice": {"type": "image_generation"}, + "parallel_tool_calls": True, + } + response = await async_client.post("/backend-api/codex/responses/compact", json=payload) + + assert response.status_code == 200 + assert len(seen_payloads) == 1 + assert seen_payloads[0]["model"] == "gpt-5.1" + assert seen_payloads[0]["instructions"] == "hi" + assert seen_payloads[0]["input"] == [] + assert "tools" not in seen_payloads[0] + assert "tool_choice" not in seen_payloads[0] + assert "parallel_tool_calls" not in seen_payloads[0] + + @pytest.mark.asyncio async def test_proxy_compact_surfaces_no_additional_quota_eligible_accounts(async_client): email = "compact-gated@example.com" diff --git a/tests/integration/test_proxy_sticky_sessions.py b/tests/integration/test_proxy_sticky_sessions.py index 9fc2818c9..50d9b4244 100644 --- a/tests/integration/test_proxy_sticky_sessions.py +++ b/tests/integration/test_proxy_sticky_sessions.py @@ -438,6 +438,93 @@ async def fake_compact(payload, headers, access_token, account_id): assert stream_seen == ["acc_sid_a", "acc_sid_a"] +@pytest.mark.asyncio +async def test_proxy_codex_session_id_reallocates_when_pinned_budget_exhausted(async_client, monkeypatch): + await _set_routing_settings(async_client, sticky_threads_enabled=False) + acc_a_id = await _import_account(async_client, "acc_sid_budget_a", "sid_budget_a@example.com") + acc_b_id = await _import_account(async_client, "acc_sid_budget_b", "sid_budget_b@example.com") + + now = utcnow() + now_epoch = int(now.replace(tzinfo=timezone.utc).timestamp()) + + async with SessionLocal() as session: + usage_repo = UsageRepository(session) + await usage_repo.add_entry( + account_id=acc_a_id, + used_percent=10.0, + window="primary", + reset_at=now_epoch + 3600, + window_minutes=300, + ) + await usage_repo.add_entry( + account_id=acc_b_id, + used_percent=20.0, + window="primary", + reset_at=now_epoch + 3600, + window_minutes=300, + ) + + stream_seen: list[str] = [] + + async def fake_stream(payload, headers, access_token, account_id, base_url=None, raise_for_status=False, **_kwargs): + stream_seen.append(account_id) + yield 'data: {"type":"response.completed","response":{"id":"resp_session_budget"}}\n\n' + + compact_seen: list[str] = [] + + async def fake_compact(payload, headers, access_token, account_id): + compact_seen.append(account_id) + return OpenAIResponsePayload.model_validate({"output": []}) + + monkeypatch.setattr(proxy_module, "core_stream_responses", fake_stream) + monkeypatch.setattr(proxy_module, "core_compact_responses", fake_compact) + + headers = {"session_id": "codex-thread-budget"} + stream_payload = { + "model": "gpt-5.1", + "instructions": "hi", + "input": [], + "stream": True, + } + response = await async_client.post("/backend-api/codex/responses", json=stream_payload, headers=headers) + assert response.status_code == 200 + assert stream_seen == ["acc_sid_budget_a"] + + async with SessionLocal() as session: + usage_repo = UsageRepository(session) + await usage_repo.add_entry( + account_id=acc_a_id, + used_percent=99.0, + window="primary", + reset_at=now_epoch + 3600, + window_minutes=300, + ) + await usage_repo.add_entry( + account_id=acc_b_id, + used_percent=5.0, + window="primary", + reset_at=now_epoch + 3600, + window_minutes=300, + ) + + compact_payload = { + "model": "gpt-5.1", + "instructions": "summarize", + "input": [{"role": "user", "content": [{"type": "input_text", "text": "hello"}]}], + } + response = await async_client.post( + "/backend-api/codex/responses/compact", + json=compact_payload, + headers=headers, + ) + assert response.status_code == 200 + assert compact_seen == ["acc_sid_budget_b"] + + response = await async_client.post("/backend-api/codex/responses", json=stream_payload, headers=headers) + assert response.status_code == 200 + assert stream_seen == ["acc_sid_budget_a", "acc_sid_budget_b"] + + @pytest.mark.asyncio async def test_proxy_codex_session_id_compact_first_pins_followup_stream_without_sticky_threads( async_client, diff --git a/tests/unit/test_openai_requests.py b/tests/unit/test_openai_requests.py index 95c3db1ba..e6e439559 100644 --- a/tests/unit/test_openai_requests.py +++ b/tests/unit/test_openai_requests.py @@ -370,6 +370,33 @@ def test_responses_accepts_builtin_tools(tool_type, expected): assert request.tools == [{"type": expected}] +@pytest.mark.parametrize( + "tool_payload", + [ + {"type": "image_generation"}, + { + "type": "computer_use_preview", + "display_width": 1024, + "display_height": 768, + "environment": "browser", + }, + {"type": "computer_use", "display_width": 1024, "display_height": 768, "environment": "browser"}, + {"type": "file_search", "vector_store_ids": ["vs_dummy"]}, + {"type": "code_interpreter", "container": {"type": "auto"}}, + ], +) +def test_responses_accepts_builtin_tool_passthrough(tool_payload): + payload = { + "model": "gpt-5.1", + "instructions": "hi", + "input": [], + "tools": [tool_payload], + } + request = ResponsesRequest.model_validate(payload) + + assert request.tools == [tool_payload] + + @pytest.mark.parametrize("tool_choice", [{"type": "web_search"}, {"type": "web_search_preview"}]) def test_responses_normalizes_tool_choice_web_search_preview(tool_choice): payload = { @@ -484,10 +511,59 @@ def test_v1_input_string_passthrough(): assert request.input == [{"role": "user", "content": [{"type": "input_text", "text": "hello"}]}] -def test_v1_rejects_builtin_tools(): - payload = {"model": "gpt-5.1", "input": [], "tools": [{"type": "image_generation"}]} - with pytest.raises(ValidationError, match="Unsupported tool type"): - V1ResponsesRequest.model_validate(payload) +@pytest.mark.parametrize( + "tool_payload", + [ + {"type": "image_generation"}, + { + "type": "computer_use_preview", + "display_width": 1024, + "display_height": 768, + "environment": "browser", + }, + {"type": "computer_use", "display_width": 1024, "display_height": 768, "environment": "browser"}, + {"type": "file_search", "vector_store_ids": ["vs_dummy"]}, + {"type": "code_interpreter", "container": {"type": "auto"}}, + ], +) +def test_v1_responses_accepts_builtin_tools(tool_payload): + payload = {"model": "gpt-5.1", "input": [], "tools": [tool_payload]} + request = V1ResponsesRequest.model_validate(payload).to_responses_request() + + assert request.tools == [tool_payload] + + +def test_compact_strips_tool_fields(): + payload = { + "model": "gpt-5.1", + "instructions": "hi", + "input": [], + "tools": [{"type": "image_generation"}], + "tool_choice": {"type": "image_generation"}, + "parallel_tool_calls": True, + } + request = ResponsesCompactRequest.model_validate(payload) + + dumped = request.to_payload() + assert "tools" not in dumped + assert "tool_choice" not in dumped + assert "parallel_tool_calls" not in dumped + + +def test_v1_compact_strips_tool_fields(): + payload = { + "model": "gpt-5.1", + "input": "hello", + "tools": [{"type": "image_generation"}], + "tool_choice": {"type": "image_generation"}, + "parallel_tool_calls": True, + } + request = V1ResponsesCompactRequest.model_validate(payload).to_compact_request() + + dumped = request.to_payload() + assert "tools" not in dumped + assert "tool_choice" not in dumped + assert "parallel_tool_calls" not in dumped def test_v1_compact_messages_convert(): diff --git a/tests/unit/test_proxy_load_balancer_refresh.py b/tests/unit/test_proxy_load_balancer_refresh.py index 64284132f..46eeede6e 100644 --- a/tests/unit/test_proxy_load_balancer_refresh.py +++ b/tests/unit/test_proxy_load_balancer_refresh.py @@ -311,6 +311,68 @@ async def test_select_account_reads_cached_usage_once_per_window() -> None: assert usage_repo.secondary_calls == 1 +@pytest.mark.asyncio +async def test_select_account_prefers_budget_safe_account_when_any_exist() -> None: + safe_account = _make_account("acc-safe", "safe@example.com") + pressured_account = _make_account("acc-pressured", "pressured@example.com") + now = utcnow() + now_epoch = int(now.replace(tzinfo=timezone.utc).timestamp()) + + primary = { + safe_account.id: UsageHistory( + id=1, + account_id=safe_account.id, + recorded_at=now, + window="primary", + used_percent=10.0, + reset_at=now_epoch + 300, + window_minutes=5, + ), + pressured_account.id: UsageHistory( + id=2, + account_id=pressured_account.id, + recorded_at=now, + window="primary", + used_percent=99.0, + reset_at=now_epoch + 300, + window_minutes=5, + ), + } + secondary = { + safe_account.id: UsageHistory( + id=3, + account_id=safe_account.id, + recorded_at=now, + window="secondary", + used_percent=80.0, + reset_at=now_epoch + 3600, + window_minutes=60, + ), + pressured_account.id: UsageHistory( + id=4, + account_id=pressured_account.id, + recorded_at=now, + window="secondary", + used_percent=5.0, + reset_at=now_epoch + 3600, + window_minutes=60, + ), + } + + accounts_repo = StubAccountsRepository([safe_account, pressured_account]) + usage_repo = StubUsageRepository(primary=primary, secondary=secondary) + sticky_repo = StubStickySessionsRepository() + + balancer = LoadBalancer(lambda: _repo_factory(accounts_repo, usage_repo, sticky_repo)) + selection = await balancer.select_account( + routing_strategy="usage_weighted", + budget_threshold_pct=95.0, + ) + + assert selection.account is not None + assert selection.account.id == safe_account.id + + @pytest.mark.asyncio async def test_select_account_filters_to_assigned_account_ids() -> None: preferred = _make_account("acc-preferred", "preferred@example.com") diff --git a/tests/unit/test_proxy_utils.py b/tests/unit/test_proxy_utils.py index 68bae47b9..bfaca3d56 100644 --- a/tests/unit/test_proxy_utils.py +++ b/tests/unit/test_proxy_utils.py @@ -275,6 +275,42 @@ def test_resolve_stream_transport_does_not_force_websocket_for_custom_codex_orig assert transport == "http" +def test_resolve_stream_transport_prefers_http_for_image_generation_even_with_native_codex_headers(monkeypatch) -> None: + monkeypatch.setattr( + proxy_module, + "get_model_registry", + lambda: SimpleNamespace(prefers_websockets=lambda model: model == "gpt-5.4"), + ) + + transport = proxy_module._resolve_stream_transport( + transport="auto", + transport_override=None, + model="gpt-5.4", + headers={"originator": "codex_chatgpt_desktop"}, + has_image_generation_tool=True, + ) + + assert transport == "http" + + +def test_resolve_stream_transport_keeps_explicit_websocket_override_for_image_generation(monkeypatch) -> None: + monkeypatch.setattr( + proxy_module, + "get_model_registry", + lambda: SimpleNamespace(prefers_websockets=lambda _model: False), + ) + + transport = proxy_module._resolve_stream_transport( + transport="auto", + transport_override="websocket", + model="gpt-5.4", + headers={}, + has_image_generation_tool=True, + ) + + assert transport == "websocket" + + def test_response_create_client_metadata_preserves_existing_json_values_and_turn_metadata(): payload = { "client_metadata": { @@ -2322,6 +2358,56 @@ class Settings: ] +@pytest.mark.asyncio +async def test_stream_responses_auto_transport_prefers_http_for_image_generation_tool(monkeypatch): + class Settings: + upstream_base_url = "https://chatgpt.com/backend-api" + upstream_stream_transport = "auto" + upstream_connect_timeout_seconds = 8.0 + stream_idle_timeout_seconds = 45.0 + max_sse_event_bytes = 1024 + image_inline_fetch_enabled = False + log_upstream_request_payload = False + proxy_request_budget_seconds = 75.0 + log_upstream_request_summary = False + + monkeypatch.setattr(proxy_module, "get_settings", lambda: Settings()) + monkeypatch.setattr( + proxy_module, + "get_model_registry", + lambda: SimpleNamespace(prefers_websockets=lambda model: model == "gpt-5.4"), + ) + monkeypatch.setattr(proxy_module, "_maybe_log_upstream_request_start", lambda **kwargs: None) + monkeypatch.setattr(proxy_module, "_maybe_log_upstream_request_complete", lambda **kwargs: None) + + session = _SseSession( + _SsePostResponse([b'data: {"type":"response.completed","response":{"id":"resp_http_image_tool"}}\n\n']) + ) + payload = ResponsesRequest.model_validate( + { + "model": "gpt-5.4", + "instructions": "draw", + "input": [{"role": "user", "content": "draw"}], + "tools": [{"type": "image_generation"}], + } + ) + + events = [ + event + async for event in proxy_module.stream_responses( + payload, + headers={"originator": "codex_chatgpt_desktop"}, + access_token="token", + account_id="acc_1", + session=cast(proxy_module.aiohttp.ClientSession, session), + ) + ] + + assert session.calls + assert not getattr(session, "ws_calls", []) + assert events == ['data: {"type":"response.completed","response":{"id":"resp_http_image_tool"}}\n\n'] + + @pytest.mark.asyncio async def test_stream_responses_http_transport_keeps_http(monkeypatch): class Settings: diff --git a/tests/unit/test_select_with_stickiness.py b/tests/unit/test_select_with_stickiness.py index ea5830144..e31eb0784 100644 --- a/tests/unit/test_select_with_stickiness.py +++ b/tests/unit/test_select_with_stickiness.py @@ -261,7 +261,7 @@ async def test_pool_exhausted_but_better_candidate_exists_reallocates(): @pytest.mark.asyncio -async def test_round_robin_pool_health_check_uses_round_robin_probe(): +async def test_round_robin_pool_health_check_prefers_budget_safe_candidate(): now = time.time() acc_a = AccountState("a", AccountStatus.ACTIVE, used_percent=96.0, last_selected_at=now - 10) acc_b = AccountState("b", AccountStatus.ACTIVE, used_percent=50.0, last_selected_at=now - 1) @@ -277,13 +277,13 @@ async def test_round_robin_pool_health_check_uses_round_robin_probe(): ) assert result.account is not None - assert result.account.account_id == "a" - repo.delete.assert_not_called() - repo.upsert.assert_called_once_with("key-round-robin", "a", kind=StickySessionKind.PROMPT_CACHE) + assert result.account.account_id == "b" + repo.delete.assert_called_once_with("key-round-robin", kind=StickySessionKind.PROMPT_CACHE) + repo.upsert.assert_called_once_with("key-round-robin", "b", kind=StickySessionKind.PROMPT_CACHE) @pytest.mark.asyncio -async def test_capacity_weighted_pool_health_check_uses_capacity_probe(): +async def test_capacity_weighted_pool_health_check_prefers_budget_safe_candidate(): acc_a = AccountState( "a", AccountStatus.ACTIVE, @@ -319,9 +319,9 @@ async def test_capacity_weighted_pool_health_check_uses_capacity_probe(): ) assert result.account is not None - assert result.account.account_id == "a" - repo.delete.assert_not_called() - repo.upsert.assert_called_once_with("key-capacity-weighted", "a", kind=StickySessionKind.PROMPT_CACHE) + assert result.account.account_id == "b" + repo.delete.assert_called_once_with("key-capacity-weighted", kind=StickySessionKind.PROMPT_CACHE) + repo.upsert.assert_called_once_with("key-capacity-weighted", "b", kind=StickySessionKind.PROMPT_CACHE) @pytest.mark.asyncio @@ -739,7 +739,7 @@ async def test_budget_threshold_95_no_reallocation_at_85_percent(): @pytest.mark.asyncio -async def test_budget_threshold_does_not_reallocate_codex_session_affinity(): +async def test_budget_threshold_reallocates_codex_session_affinity(): acc_a = _active("a", used_percent=96.0) acc_b = _active("b", used_percent=50.0) repo = _make_sticky_repo(existing_account_id="a") @@ -755,9 +755,9 @@ async def test_budget_threshold_does_not_reallocate_codex_session_affinity(): ) assert result.account is not None - assert result.account.account_id == "a" - repo.delete.assert_not_called() - repo.upsert.assert_not_called() + assert result.account.account_id == "b" + repo.delete.assert_called_once_with("codex-session-123", kind=StickySessionKind.CODEX_SESSION) + repo.upsert.assert_called_once_with("codex-session-123", "b", kind=StickySessionKind.CODEX_SESSION) @pytest.mark.asyncio diff --git a/tests/unit/test_settings_multi_replica.py b/tests/unit/test_settings_multi_replica.py index d8e67c3a3..3bdf62b25 100644 --- a/tests/unit/test_settings_multi_replica.py +++ b/tests/unit/test_settings_multi_replica.py @@ -27,6 +27,7 @@ def test_settings_multi_replica_defaults(): assert settings.proxy_response_create_limit == 64 assert settings.proxy_compact_response_create_limit == 16 assert settings.proxy_downstream_websocket_idle_timeout_seconds == 120.0 + assert settings.max_sse_event_bytes == 16 * 1024 * 1024 assert settings.proxy_refresh_failure_cooldown_seconds == 5.0 assert settings.usage_refresh_auth_failure_cooldown_seconds == 300.0 assert settings.otel_enabled is False