diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index 52bf055315..bd2a982e02 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -3,7 +3,7 @@ import json import logging import os -from typing import Any, TypedDict +from typing import Any, Literal, TypedDict from urllib.parse import urlparse from pydantic import BaseModel, PrivateAttr, model_validator @@ -72,6 +72,19 @@ class AzureCompletion(BaseLLM): This class provides direct integration with the Azure AI Inference Python SDK, offering native function calling, streaming support, and proper Azure authentication. + + Supports both Chat Completions API (default) and Responses API. + When ``api="responses"`` is set, calls are delegated to the OpenAI Responses API + implementation with the Azure resource's ``/openai/v1/`` base URL, reusing the + fully-tested OpenAI Responses API code path. + + Example:: + + # Chat Completions (default) + llm = LLM(model="azure/gpt-4o", api_key=KEY, endpoint=ENDPOINT) + + # Responses API + llm = LLM(model="azure/gpt-4o", api="responses", api_key=KEY, endpoint=ENDPOINT) """ endpoint: str | None = None @@ -82,14 +95,27 @@ class AzureCompletion(BaseLLM): frequency_penalty: float | None = None presence_penalty: float | None = None max_tokens: int | None = None + max_completion_tokens: int | None = None stream: bool = False interceptor: BaseInterceptor[Any, Any] | None = None response_format: type[BaseModel] | None = None is_openai_model: bool = False is_azure_openai_endpoint: bool = False + api: Literal["completions", "responses"] = "completions" + instructions: str | None = None + store: bool | None = None + previous_response_id: str | None = None + include: list[str] | None = None + builtin_tools: list[str] | None = None + parse_tool_outputs: bool = False + auto_chain: bool = False + auto_chain_reasoning: bool = False + reasoning_effort: str | None = None + seed: int | None = None _client: Any = PrivateAttr(default=None) _async_client: Any = PrivateAttr(default=None) + _responses_delegate: Any = PrivateAttr(default=None) @model_validator(mode="before") @classmethod @@ -142,17 +168,95 @@ def _normalize_azure_fields(cls, data: Any) -> Any: def _init_clients(self) -> AzureCompletion: if not self.api_key: raise ValueError("Azure API key is required.") - client_kwargs: dict[str, Any] = { - "endpoint": self.endpoint, - "credential": AzureKeyCredential(self.api_key), - } - if self.api_version: - client_kwargs["api_version"] = self.api_version - self._client = ChatCompletionsClient(**client_kwargs) - self._async_client = AsyncChatCompletionsClient(**client_kwargs) + if self.api == "responses": + self._init_responses_delegate() + else: + client_kwargs: dict[str, Any] = { + "endpoint": self.endpoint, + "credential": AzureKeyCredential(self.api_key), + } + if self.api_version: + client_kwargs["api_version"] = self.api_version + + self._client = ChatCompletionsClient(**client_kwargs) + self._async_client = AsyncChatCompletionsClient(**client_kwargs) return self + def _init_responses_delegate(self) -> None: + """Initialise the OpenAICompletion delegate for Responses API calls. + + Constructs the Azure-compatible ``/openai/v1/`` base URL from the + configured endpoint and creates an :class:`OpenAICompletion` instance + that handles all Responses API logic. + """ + from crewai.llms.providers.openai.completion import OpenAICompletion + + # Build the Azure base_url: /openai/v1/ + raw_endpoint = self.endpoint or "" + # Strip the /openai/deployments/ suffix if present + deployment_idx = raw_endpoint.find("/openai/deployments/") + if deployment_idx != -1: + resource_url = raw_endpoint[:deployment_idx] + else: + resource_url = raw_endpoint.rstrip("/") + + api_version = self.api_version or "2024-06-01" + base_url = f"{resource_url}/openai/v1/?api-version={api_version}" + + delegate_kwargs: dict[str, Any] = { + "model": self.model, + "provider": "openai", + "api_key": self.api_key, + "base_url": base_url, + "api": "responses", + "stream": self.stream, + } + + # Forward Responses API parameters + if self.instructions is not None: + delegate_kwargs["instructions"] = self.instructions + if self.store is not None: + delegate_kwargs["store"] = self.store + if self.previous_response_id is not None: + delegate_kwargs["previous_response_id"] = self.previous_response_id + if self.include is not None: + delegate_kwargs["include"] = self.include + if self.builtin_tools is not None: + delegate_kwargs["builtin_tools"] = self.builtin_tools + if self.parse_tool_outputs: + delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs + if self.auto_chain: + delegate_kwargs["auto_chain"] = self.auto_chain + if self.auto_chain_reasoning: + delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning + if self.reasoning_effort is not None: + delegate_kwargs["reasoning_effort"] = self.reasoning_effort + if self.temperature is not None: + delegate_kwargs["temperature"] = self.temperature + if self.top_p is not None: + delegate_kwargs["top_p"] = self.top_p + if self.max_tokens is not None: + delegate_kwargs["max_tokens"] = self.max_tokens + if self.max_completion_tokens is not None: + delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens + if self.seed is not None: + delegate_kwargs["seed"] = self.seed + if self.timeout is not None: + delegate_kwargs["timeout"] = self.timeout + if self.max_retries != 2: + delegate_kwargs["max_retries"] = self.max_retries + if self.response_format is not None: + delegate_kwargs["response_format"] = self.response_format + if self.stop: + delegate_kwargs["stop"] = self.stop + if self.frequency_penalty is not None: + delegate_kwargs["frequency_penalty"] = self.frequency_penalty + if self.presence_penalty is not None: + delegate_kwargs["presence_penalty"] = self.presence_penalty + + self._responses_delegate = OpenAICompletion(**delegate_kwargs) + def to_config_dict(self) -> dict[str, Any]: """Extend base config with Azure-specific fields.""" config = super().to_config_dict() @@ -172,6 +276,10 @@ def to_config_dict(self) -> dict[str, Any]: config["presence_penalty"] = self.presence_penalty if self.max_tokens is not None: config["max_tokens"] = self.max_tokens + if self.api != "completions": + config["api"] = self.api + if self.reasoning_effort is not None: + config["reasoning_effort"] = self.reasoning_effort return config @staticmethod @@ -277,7 +385,7 @@ def call( from_agent: Any | None = None, response_model: type[BaseModel] | None = None, ) -> str | Any: - """Call Azure AI Inference chat completions API. + """Call Azure AI Inference API (Chat Completions or Responses). Args: messages: Input messages for the chat completion @@ -291,6 +399,17 @@ def call( Returns: Chat completion response or tool call result """ + if self.api == "responses" and self._responses_delegate is not None: + return self._responses_delegate.call( + messages=messages, + tools=tools, + callbacks=callbacks, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + with llm_call_context(): try: # Emit call started event @@ -349,7 +468,7 @@ async def acall( # type: ignore[return] from_agent: Any | None = None, response_model: type[BaseModel] | None = None, ) -> str | Any: - """Call Azure AI Inference chat completions API asynchronously. + """Call Azure AI Inference API asynchronously (Chat Completions or Responses). Args: messages: Input messages for the chat completion @@ -363,6 +482,17 @@ async def acall( # type: ignore[return] Returns: Chat completion response or tool call result """ + if self.api == "responses" and self._responses_delegate is not None: + return await self._responses_delegate.acall( + messages=messages, + tools=tools, + callbacks=callbacks, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + with llm_call_context(): try: self._emit_call_started_event( @@ -1090,6 +1220,19 @@ def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]: } return {"total_tokens": 0} + @property + def last_response_id(self) -> str | None: + """Get the last response ID from auto-chaining (Responses API only).""" + if self._responses_delegate is not None: + rid: str | None = self._responses_delegate.last_response_id + return rid + return None + + def reset_chain(self) -> None: + """Reset the auto-chain state (Responses API only).""" + if self._responses_delegate is not None: + self._responses_delegate.reset_chain() + async def aclose(self) -> None: """Close the async client and clean up resources. diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index a0da309986..6c7f1c6256 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -1,13 +1,14 @@ import os import sys import types -from unittest.mock import patch, MagicMock, Mock +from unittest.mock import patch, MagicMock, Mock, AsyncMock import pytest from crewai.llm import LLM from crewai.crew import Crew from crewai.agent import Agent from crewai.task import Task +from crewai.llms.providers.azure.completion import AzureCompletion @pytest.fixture @@ -1403,3 +1404,470 @@ def test_azure_stop_words_still_applied_to_regular_responses(): assert "Observation:" not in result assert "Found results" not in result assert "I need to search for more information" in result + + +# ============================================================================= +# Azure Responses API Tests +# ============================================================================= + + +def test_azure_responses_api_initialization(): + """Test that AzureCompletion can be initialized with api='responses'.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + instructions="You are a helpful assistant.", + store=True, + ) + + assert llm.api == "responses" + assert llm.instructions == "You are a helpful assistant." + assert llm.store is True + assert llm.model == "gpt-4o" + assert llm._responses_delegate is not None + + +def test_azure_responses_api_default_is_completions(): + """Test that the default API is 'completions' for backward compatibility.""" + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + assert llm.api == "completions" + assert llm._responses_delegate is None + + +def test_azure_responses_api_delegate_is_openai_completion(): + """Test that the Responses API delegate is an OpenAICompletion instance.""" + from crewai.llms.providers.openai.completion import OpenAICompletion + + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + assert isinstance(llm._responses_delegate, OpenAICompletion) + assert llm._responses_delegate.api == "responses" + assert llm._responses_delegate.model == "gpt-4o" + + +def test_azure_responses_api_base_url_construction(): + """Test that the Azure base URL is correctly constructed for Responses API.""" + from crewai.llms.providers.openai.completion import OpenAICompletion + + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + api_version="2025-03-01-preview", + ) + + delegate = llm._responses_delegate + assert isinstance(delegate, OpenAICompletion) + assert delegate.base_url == "https://my-resource.openai.azure.com/openai/v1/?api-version=2025-03-01-preview" + + +def test_azure_responses_api_base_url_strips_deployment_suffix(): + """Test that deployment suffix is stripped from endpoint for Responses API base URL.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com/openai/deployments/gpt-4o", + ) + + delegate = llm._responses_delegate + assert "my-resource.openai.azure.com/openai/v1/" in delegate.base_url + assert "/openai/deployments/" not in delegate.base_url + + +def test_azure_responses_api_base_url_with_trailing_slash(): + """Test that endpoint with trailing slash is handled correctly.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com/", + api_version="2025-03-01-preview", + ) + + delegate = llm._responses_delegate + assert delegate.base_url == "https://my-resource.openai.azure.com/openai/v1/?api-version=2025-03-01-preview" + + +def test_azure_responses_api_forwards_parameters(): + """Test that Responses API parameters are forwarded to the delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + instructions="Be helpful", + store=True, + previous_response_id="resp_abc123", + include=["reasoning.encrypted_content"], + builtin_tools=["web_search_preview"], + parse_tool_outputs=True, + auto_chain=True, + auto_chain_reasoning=True, + temperature=0.5, + top_p=0.9, + max_tokens=1000, + reasoning_effort="high", + seed=42, + frequency_penalty=0.3, + presence_penalty=0.6, + ) + + delegate = llm._responses_delegate + assert delegate.instructions == "Be helpful" + assert delegate.store is True + assert delegate.previous_response_id == "resp_abc123" + assert delegate.include == ["reasoning.encrypted_content"] + assert delegate.builtin_tools == ["web_search_preview"] + assert delegate.parse_tool_outputs is True + assert delegate.auto_chain is True + assert delegate.auto_chain_reasoning is True + assert delegate.temperature == 0.5 + assert delegate.top_p == 0.9 + assert delegate.max_tokens == 1000 + assert delegate.reasoning_effort == "high" + assert delegate.seed == 42 + assert delegate.frequency_penalty == 0.3 + assert delegate.presence_penalty == 0.6 + + +def test_azure_responses_api_call_delegates_to_openai(): + """Test that call() with api='responses' delegates to the OpenAI delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + with patch.object(llm._responses_delegate, "call", return_value="responses result") as mock_call: + result = llm.call("Hello, world!") + mock_call.assert_called_once_with( + messages="Hello, world!", + tools=None, + callbacks=None, + available_functions=None, + from_task=None, + from_agent=None, + response_model=None, + ) + assert result == "responses result" + + +@pytest.mark.asyncio +async def test_azure_responses_api_acall_delegates_to_openai(): + """Test that acall() with api='responses' delegates to the OpenAI delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + with patch.object( + llm._responses_delegate, "acall", new_callable=AsyncMock, return_value="async responses result" + ) as mock_acall: + result = await llm.acall("Hello async!") + mock_acall.assert_called_once_with( + messages="Hello async!", + tools=None, + callbacks=None, + available_functions=None, + from_task=None, + from_agent=None, + response_model=None, + ) + assert result == "async responses result" + + +def test_azure_responses_api_call_with_tools(): + """Test that call() with api='responses' forwards tools to the delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + tools = [ + { + "name": "get_weather", + "description": "Get weather", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + } + ] + + available_functions = {"get_weather": lambda location: f"Sunny in {location}"} + + with patch.object(llm._responses_delegate, "call", return_value="Weather result") as mock_call: + result = llm.call( + messages=[{"role": "user", "content": "What's the weather?"}], + tools=tools, + available_functions=available_functions, + ) + mock_call.assert_called_once() + call_kwargs = mock_call.call_args + assert call_kwargs.kwargs["tools"] == tools + assert call_kwargs.kwargs["available_functions"] == available_functions + assert result == "Weather result" + + +def test_azure_responses_api_completions_not_affected(): + """Test that completions API path is unaffected when api='completions'.""" + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + assert llm.api == "completions" + assert llm._responses_delegate is None + assert llm._client is not None + assert llm._async_client is not None + + +def test_azure_responses_api_via_llm_factory(): + """Test that api='responses' works when creating via LLM factory.""" + llm = LLM( + model="azure/gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + assert isinstance(llm, AzureCompletion) + assert llm.api == "responses" + assert llm._responses_delegate is not None + + +def test_azure_responses_api_to_config_dict(): + """Test that to_config_dict() includes api field when set to 'responses'.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + reasoning_effort="high", + ) + + config = llm.to_config_dict() + assert config["api"] == "responses" + assert config["reasoning_effort"] == "high" + + +def test_azure_completions_api_to_config_dict_no_api_field(): + """Test that to_config_dict() does not include api when default 'completions'.""" + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + config = llm.to_config_dict() + assert "api" not in config + + +def test_azure_responses_api_last_response_id(): + """Test that last_response_id property delegates to the OpenAI delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + auto_chain=True, + ) + + # Initially None + assert llm.last_response_id is None + + # Mock the delegate's last_response_id + llm._responses_delegate._last_response_id = "resp_xyz789" + assert llm.last_response_id == "resp_xyz789" + + +def test_azure_responses_api_reset_chain(): + """Test that reset_chain() delegates to the OpenAI delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + auto_chain=True, + ) + + # Set a response ID on the delegate + llm._responses_delegate._last_response_id = "resp_xyz789" + assert llm.last_response_id == "resp_xyz789" + + # Reset the chain + llm.reset_chain() + assert llm.last_response_id is None + + +def test_azure_responses_api_last_response_id_without_delegate(): + """Test that last_response_id returns None when no delegate (completions mode).""" + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + assert llm.last_response_id is None + + +def test_azure_responses_api_reset_chain_without_delegate(): + """Test that reset_chain() is a no-op when no delegate (completions mode).""" + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + # Should not raise + llm.reset_chain() + + +def test_azure_responses_api_with_structured_output(): + """Test that structured output (response_model) is forwarded to the delegate.""" + from pydantic import BaseModel, Field + + class MathAnswer(BaseModel): + result: int = Field(description="The numerical result") + explanation: str = Field(description="Brief explanation") + + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + mock_answer = MathAnswer(result=42, explanation="The answer to everything") + with patch.object(llm._responses_delegate, "call", return_value=mock_answer) as mock_call: + result = llm.call("What is the answer?", response_model=MathAnswer) + mock_call.assert_called_once() + call_kwargs = mock_call.call_args + assert call_kwargs is not None + assert call_kwargs.kwargs["response_model"] == MathAnswer + assert isinstance(result, MathAnswer) + assert result.result == 42 + + +def test_azure_responses_api_streaming_forwarded(): + """Test that stream=True is forwarded to the Responses API delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + stream=True, + ) + + assert llm._responses_delegate.stream is True + + +def test_azure_responses_api_max_completion_tokens_forwarded(): + """Test that max_completion_tokens is forwarded to the delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + max_completion_tokens=500, + ) + + assert llm._responses_delegate.max_completion_tokens == 500 + + +def test_azure_responses_api_default_api_version_in_url(): + """Test that the api_version is included in the Responses API base URL.""" + with patch.dict(os.environ, {}, clear=False): + # Remove AZURE_API_VERSION if set to ensure we get the code default + env = os.environ.copy() + env.pop("AZURE_API_VERSION", None) + with patch.dict(os.environ, env, clear=True): + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + assert "api-version=" in llm._responses_delegate.base_url + assert "api-version=2024-06-01" in llm._responses_delegate.base_url + + +def test_azure_responses_api_custom_api_version_in_url(): + """Test that custom api_version is used in URL when specified.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + api_version="2025-03-01-preview", + ) + + assert "api-version=2025-03-01-preview" in llm._responses_delegate.base_url + + +def test_azure_responses_api_no_chat_clients_created(): + """Test that Chat Completions clients are NOT created when api='responses'.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + ) + + # In responses mode, the native Azure clients should not be initialized + assert llm._client is None + assert llm._async_client is None + assert llm._responses_delegate is not None + + +def test_azure_responses_api_stop_words_forwarded(): + """Test that stop words are forwarded to the delegate.""" + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + stop=["STOP"], + ) + + assert llm._responses_delegate.stop == ["STOP"] + + +def test_azure_responses_api_response_format_forwarded(): + """Test that response_format is forwarded to the delegate.""" + from pydantic import BaseModel + + class MyFormat(BaseModel): + answer: str + + llm = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://my-resource.openai.azure.com", + response_format=MyFormat, + ) + + assert llm._responses_delegate.response_format == MyFormat