-
Notifications
You must be signed in to change notification settings - Fork 6.8k
feat: add Responses API support for Azure OpenAI provider #5203
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
f69171c
feat: add Responses API support for Azure OpenAI provider
devin-ai-integration[bot] 1a7d2ad
fix: resolve mypy no-any-return error in last_response_id property
devin-ai-integration[bot] 2315422
fix: forward frequency_penalty and presence_penalty to Responses API …
devin-ai-integration[bot] File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,7 +3,7 @@ | |
| import json | ||
| import logging | ||
| import os | ||
| from typing import Any, TypedDict | ||
| from typing import Any, Literal, TypedDict | ||
| from urllib.parse import urlparse | ||
|
|
||
| from pydantic import BaseModel, PrivateAttr, model_validator | ||
|
|
@@ -72,6 +72,19 @@ class AzureCompletion(BaseLLM): | |
|
|
||
| This class provides direct integration with the Azure AI Inference Python SDK, | ||
| offering native function calling, streaming support, and proper Azure authentication. | ||
|
|
||
| Supports both Chat Completions API (default) and Responses API. | ||
| When ``api="responses"`` is set, calls are delegated to the OpenAI Responses API | ||
| implementation with the Azure resource's ``/openai/v1/`` base URL, reusing the | ||
| fully-tested OpenAI Responses API code path. | ||
|
|
||
| Example:: | ||
|
|
||
| # Chat Completions (default) | ||
| llm = LLM(model="azure/gpt-4o", api_key=KEY, endpoint=ENDPOINT) | ||
|
|
||
| # Responses API | ||
| llm = LLM(model="azure/gpt-4o", api="responses", api_key=KEY, endpoint=ENDPOINT) | ||
| """ | ||
|
|
||
| endpoint: str | None = None | ||
|
|
@@ -82,14 +95,27 @@ class AzureCompletion(BaseLLM): | |
| frequency_penalty: float | None = None | ||
| presence_penalty: float | None = None | ||
| max_tokens: int | None = None | ||
| max_completion_tokens: int | None = None | ||
| stream: bool = False | ||
| interceptor: BaseInterceptor[Any, Any] | None = None | ||
| response_format: type[BaseModel] | None = None | ||
| is_openai_model: bool = False | ||
| is_azure_openai_endpoint: bool = False | ||
| api: Literal["completions", "responses"] = "completions" | ||
| instructions: str | None = None | ||
| store: bool | None = None | ||
| previous_response_id: str | None = None | ||
| include: list[str] | None = None | ||
| builtin_tools: list[str] | None = None | ||
| parse_tool_outputs: bool = False | ||
| auto_chain: bool = False | ||
| auto_chain_reasoning: bool = False | ||
| reasoning_effort: str | None = None | ||
| seed: int | None = None | ||
|
|
||
| _client: Any = PrivateAttr(default=None) | ||
| _async_client: Any = PrivateAttr(default=None) | ||
| _responses_delegate: Any = PrivateAttr(default=None) | ||
|
|
||
| @model_validator(mode="before") | ||
| @classmethod | ||
|
|
@@ -142,17 +168,95 @@ def _normalize_azure_fields(cls, data: Any) -> Any: | |
| def _init_clients(self) -> AzureCompletion: | ||
| if not self.api_key: | ||
| raise ValueError("Azure API key is required.") | ||
| client_kwargs: dict[str, Any] = { | ||
| "endpoint": self.endpoint, | ||
| "credential": AzureKeyCredential(self.api_key), | ||
| } | ||
| if self.api_version: | ||
| client_kwargs["api_version"] = self.api_version | ||
|
|
||
| self._client = ChatCompletionsClient(**client_kwargs) | ||
| self._async_client = AsyncChatCompletionsClient(**client_kwargs) | ||
| if self.api == "responses": | ||
| self._init_responses_delegate() | ||
| else: | ||
| client_kwargs: dict[str, Any] = { | ||
| "endpoint": self.endpoint, | ||
| "credential": AzureKeyCredential(self.api_key), | ||
| } | ||
| if self.api_version: | ||
| client_kwargs["api_version"] = self.api_version | ||
|
|
||
| self._client = ChatCompletionsClient(**client_kwargs) | ||
| self._async_client = AsyncChatCompletionsClient(**client_kwargs) | ||
| return self | ||
|
|
||
| def _init_responses_delegate(self) -> None: | ||
| """Initialise the OpenAICompletion delegate for Responses API calls. | ||
|
|
||
| Constructs the Azure-compatible ``/openai/v1/`` base URL from the | ||
| configured endpoint and creates an :class:`OpenAICompletion` instance | ||
| that handles all Responses API logic. | ||
| """ | ||
| from crewai.llms.providers.openai.completion import OpenAICompletion | ||
|
|
||
| # Build the Azure base_url: <resource>/openai/v1/ | ||
| raw_endpoint = self.endpoint or "" | ||
| # Strip the /openai/deployments/<deployment> suffix if present | ||
| deployment_idx = raw_endpoint.find("/openai/deployments/") | ||
| if deployment_idx != -1: | ||
| resource_url = raw_endpoint[:deployment_idx] | ||
| else: | ||
| resource_url = raw_endpoint.rstrip("/") | ||
|
|
||
| api_version = self.api_version or "2024-06-01" | ||
| base_url = f"{resource_url}/openai/v1/?api-version={api_version}" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. URL format is wrong. |
||
|
|
||
| delegate_kwargs: dict[str, Any] = { | ||
| "model": self.model, | ||
| "provider": "openai", | ||
| "api_key": self.api_key, | ||
| "base_url": base_url, | ||
| "api": "responses", | ||
| "stream": self.stream, | ||
| } | ||
|
|
||
| # Forward Responses API parameters | ||
| if self.instructions is not None: | ||
| delegate_kwargs["instructions"] = self.instructions | ||
| if self.store is not None: | ||
| delegate_kwargs["store"] = self.store | ||
| if self.previous_response_id is not None: | ||
| delegate_kwargs["previous_response_id"] = self.previous_response_id | ||
| if self.include is not None: | ||
| delegate_kwargs["include"] = self.include | ||
| if self.builtin_tools is not None: | ||
| delegate_kwargs["builtin_tools"] = self.builtin_tools | ||
| if self.parse_tool_outputs: | ||
| delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs | ||
| if self.auto_chain: | ||
| delegate_kwargs["auto_chain"] = self.auto_chain | ||
| if self.auto_chain_reasoning: | ||
| delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning | ||
| if self.reasoning_effort is not None: | ||
| delegate_kwargs["reasoning_effort"] = self.reasoning_effort | ||
| if self.temperature is not None: | ||
| delegate_kwargs["temperature"] = self.temperature | ||
| if self.top_p is not None: | ||
| delegate_kwargs["top_p"] = self.top_p | ||
| if self.max_tokens is not None: | ||
| delegate_kwargs["max_tokens"] = self.max_tokens | ||
| if self.max_completion_tokens is not None: | ||
| delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens | ||
| if self.seed is not None: | ||
| delegate_kwargs["seed"] = self.seed | ||
| if self.timeout is not None: | ||
| delegate_kwargs["timeout"] = self.timeout | ||
| if self.max_retries != 2: | ||
| delegate_kwargs["max_retries"] = self.max_retries | ||
| if self.response_format is not None: | ||
| delegate_kwargs["response_format"] = self.response_format | ||
| if self.stop: | ||
| delegate_kwargs["stop"] = self.stop | ||
|
cursor[bot] marked this conversation as resolved.
|
||
| if self.frequency_penalty is not None: | ||
| delegate_kwargs["frequency_penalty"] = self.frequency_penalty | ||
| if self.presence_penalty is not None: | ||
| delegate_kwargs["presence_penalty"] = self.presence_penalty | ||
|
|
||
| self._responses_delegate = OpenAICompletion(**delegate_kwargs) | ||
|
|
||
| def to_config_dict(self) -> dict[str, Any]: | ||
| """Extend base config with Azure-specific fields.""" | ||
| config = super().to_config_dict() | ||
|
|
@@ -172,6 +276,10 @@ def to_config_dict(self) -> dict[str, Any]: | |
| config["presence_penalty"] = self.presence_penalty | ||
| if self.max_tokens is not None: | ||
| config["max_tokens"] = self.max_tokens | ||
| if self.api != "completions": | ||
| config["api"] = self.api | ||
| if self.reasoning_effort is not None: | ||
| config["reasoning_effort"] = self.reasoning_effort | ||
| return config | ||
|
|
||
| @staticmethod | ||
|
|
@@ -277,7 +385,7 @@ def call( | |
| from_agent: Any | None = None, | ||
| response_model: type[BaseModel] | None = None, | ||
| ) -> str | Any: | ||
| """Call Azure AI Inference chat completions API. | ||
| """Call Azure AI Inference API (Chat Completions or Responses). | ||
|
|
||
| Args: | ||
| messages: Input messages for the chat completion | ||
|
|
@@ -291,6 +399,17 @@ def call( | |
| Returns: | ||
| Chat completion response or tool call result | ||
| """ | ||
| if self.api == "responses" and self._responses_delegate is not None: | ||
| return self._responses_delegate.call( | ||
| messages=messages, | ||
| tools=tools, | ||
| callbacks=callbacks, | ||
| available_functions=available_functions, | ||
| from_task=from_task, | ||
| from_agent=from_agent, | ||
| response_model=response_model, | ||
| ) | ||
|
|
||
| with llm_call_context(): | ||
| try: | ||
| # Emit call started event | ||
|
|
@@ -349,7 +468,7 @@ async def acall( # type: ignore[return] | |
| from_agent: Any | None = None, | ||
| response_model: type[BaseModel] | None = None, | ||
| ) -> str | Any: | ||
| """Call Azure AI Inference chat completions API asynchronously. | ||
| """Call Azure AI Inference API asynchronously (Chat Completions or Responses). | ||
|
|
||
| Args: | ||
| messages: Input messages for the chat completion | ||
|
|
@@ -363,6 +482,17 @@ async def acall( # type: ignore[return] | |
| Returns: | ||
| Chat completion response or tool call result | ||
| """ | ||
| if self.api == "responses" and self._responses_delegate is not None: | ||
| return await self._responses_delegate.acall( | ||
| messages=messages, | ||
| tools=tools, | ||
| callbacks=callbacks, | ||
| available_functions=available_functions, | ||
| from_task=from_task, | ||
| from_agent=from_agent, | ||
| response_model=response_model, | ||
| ) | ||
|
|
||
| with llm_call_context(): | ||
| try: | ||
| self._emit_call_started_event( | ||
|
|
@@ -1090,6 +1220,19 @@ def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]: | |
| } | ||
| return {"total_tokens": 0} | ||
|
|
||
| @property | ||
| def last_response_id(self) -> str | None: | ||
| """Get the last response ID from auto-chaining (Responses API only).""" | ||
| if self._responses_delegate is not None: | ||
| rid: str | None = self._responses_delegate.last_response_id | ||
| return rid | ||
| return None | ||
|
|
||
| def reset_chain(self) -> None: | ||
| """Reset the auto-chain state (Responses API only).""" | ||
| if self._responses_delegate is not None: | ||
| self._responses_delegate.reset_chain() | ||
|
|
||
| async def aclose(self) -> None: | ||
| """Close the async client and clean up resources. | ||
|
|
||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Query parameter in base_url produces malformed request URLs
High Severity
The
base_urlis constructed with a query parameter (?api-version=...) embedded in it. The OpenAI Python SDK uses httpx'sraw_path(which includes the query string) for URL joining, so appending API paths likeresponsesproduces malformed URLs such as/openai/v1/?api-version=2024-06-01/responses. Microsoft's official documentation shows the correct format ishttps://RESOURCE.openai.azure.com/openai/v1/without any query parameter — the v1 API endpoint doesn't requireapi-version. All tests pass because they mockdelegate.call()and never exercise actual HTTP URL construction.Additional Locations (1)
lib/crewai/src/crewai/llms/providers/azure/completion.py#L203-L205