Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 154 additions & 11 deletions lib/crewai/src/crewai/llms/providers/azure/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import logging
import os
from typing import Any, TypedDict
from typing import Any, Literal, TypedDict
from urllib.parse import urlparse

from pydantic import BaseModel, PrivateAttr, model_validator
Expand Down Expand Up @@ -72,6 +72,19 @@ class AzureCompletion(BaseLLM):

This class provides direct integration with the Azure AI Inference Python SDK,
offering native function calling, streaming support, and proper Azure authentication.

Supports both Chat Completions API (default) and Responses API.
When ``api="responses"`` is set, calls are delegated to the OpenAI Responses API
implementation with the Azure resource's ``/openai/v1/`` base URL, reusing the
fully-tested OpenAI Responses API code path.

Example::

# Chat Completions (default)
llm = LLM(model="azure/gpt-4o", api_key=KEY, endpoint=ENDPOINT)

# Responses API
llm = LLM(model="azure/gpt-4o", api="responses", api_key=KEY, endpoint=ENDPOINT)
"""

endpoint: str | None = None
Expand All @@ -82,14 +95,27 @@ class AzureCompletion(BaseLLM):
frequency_penalty: float | None = None
presence_penalty: float | None = None
max_tokens: int | None = None
max_completion_tokens: int | None = None
stream: bool = False
interceptor: BaseInterceptor[Any, Any] | None = None
response_format: type[BaseModel] | None = None
is_openai_model: bool = False
is_azure_openai_endpoint: bool = False
api: Literal["completions", "responses"] = "completions"
instructions: str | None = None
store: bool | None = None
previous_response_id: str | None = None
include: list[str] | None = None
builtin_tools: list[str] | None = None
parse_tool_outputs: bool = False
auto_chain: bool = False
auto_chain_reasoning: bool = False
reasoning_effort: str | None = None
seed: int | None = None

_client: Any = PrivateAttr(default=None)
_async_client: Any = PrivateAttr(default=None)
_responses_delegate: Any = PrivateAttr(default=None)

@model_validator(mode="before")
@classmethod
Expand Down Expand Up @@ -142,17 +168,95 @@ def _normalize_azure_fields(cls, data: Any) -> Any:
def _init_clients(self) -> AzureCompletion:
if not self.api_key:
raise ValueError("Azure API key is required.")
client_kwargs: dict[str, Any] = {
"endpoint": self.endpoint,
"credential": AzureKeyCredential(self.api_key),
}
if self.api_version:
client_kwargs["api_version"] = self.api_version

self._client = ChatCompletionsClient(**client_kwargs)
self._async_client = AsyncChatCompletionsClient(**client_kwargs)
if self.api == "responses":
self._init_responses_delegate()
else:
client_kwargs: dict[str, Any] = {
"endpoint": self.endpoint,
"credential": AzureKeyCredential(self.api_key),
}
if self.api_version:
client_kwargs["api_version"] = self.api_version

self._client = ChatCompletionsClient(**client_kwargs)
self._async_client = AsyncChatCompletionsClient(**client_kwargs)
return self

def _init_responses_delegate(self) -> None:
"""Initialise the OpenAICompletion delegate for Responses API calls.

Constructs the Azure-compatible ``/openai/v1/`` base URL from the
configured endpoint and creates an :class:`OpenAICompletion` instance
that handles all Responses API logic.
"""
from crewai.llms.providers.openai.completion import OpenAICompletion

# Build the Azure base_url: <resource>/openai/v1/
raw_endpoint = self.endpoint or ""
# Strip the /openai/deployments/<deployment> suffix if present
deployment_idx = raw_endpoint.find("/openai/deployments/")
if deployment_idx != -1:
resource_url = raw_endpoint[:deployment_idx]
else:
resource_url = raw_endpoint.rstrip("/")

api_version = self.api_version or "2024-06-01"
base_url = f"{resource_url}/openai/v1/?api-version={api_version}"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Query parameter in base_url produces malformed request URLs

High Severity

The base_url is constructed with a query parameter (?api-version=...) embedded in it. The OpenAI Python SDK uses httpx's raw_path (which includes the query string) for URL joining, so appending API paths like responses produces malformed URLs such as /openai/v1/?api-version=2024-06-01/responses. Microsoft's official documentation shows the correct format is https://RESOURCE.openai.azure.com/openai/v1/ without any query parameter — the v1 API endpoint doesn't require api-version. All tests pass because they mock delegate.call() and never exercise actual HTTP URL construction.

Additional Locations (1)
Fix in Cursor Fix in Web

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

URL format is wrong.


delegate_kwargs: dict[str, Any] = {
"model": self.model,
"provider": "openai",
"api_key": self.api_key,
"base_url": base_url,
"api": "responses",
"stream": self.stream,
}

# Forward Responses API parameters
if self.instructions is not None:
delegate_kwargs["instructions"] = self.instructions
if self.store is not None:
delegate_kwargs["store"] = self.store
if self.previous_response_id is not None:
delegate_kwargs["previous_response_id"] = self.previous_response_id
if self.include is not None:
delegate_kwargs["include"] = self.include
if self.builtin_tools is not None:
delegate_kwargs["builtin_tools"] = self.builtin_tools
if self.parse_tool_outputs:
delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs
if self.auto_chain:
delegate_kwargs["auto_chain"] = self.auto_chain
if self.auto_chain_reasoning:
delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning
if self.reasoning_effort is not None:
delegate_kwargs["reasoning_effort"] = self.reasoning_effort
if self.temperature is not None:
delegate_kwargs["temperature"] = self.temperature
if self.top_p is not None:
delegate_kwargs["top_p"] = self.top_p
if self.max_tokens is not None:
delegate_kwargs["max_tokens"] = self.max_tokens
if self.max_completion_tokens is not None:
delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens
if self.seed is not None:
delegate_kwargs["seed"] = self.seed
if self.timeout is not None:
delegate_kwargs["timeout"] = self.timeout
if self.max_retries != 2:
delegate_kwargs["max_retries"] = self.max_retries
if self.response_format is not None:
delegate_kwargs["response_format"] = self.response_format
if self.stop:
delegate_kwargs["stop"] = self.stop
Comment thread
cursor[bot] marked this conversation as resolved.
if self.frequency_penalty is not None:
delegate_kwargs["frequency_penalty"] = self.frequency_penalty
if self.presence_penalty is not None:
delegate_kwargs["presence_penalty"] = self.presence_penalty

self._responses_delegate = OpenAICompletion(**delegate_kwargs)

def to_config_dict(self) -> dict[str, Any]:
"""Extend base config with Azure-specific fields."""
config = super().to_config_dict()
Expand All @@ -172,6 +276,10 @@ def to_config_dict(self) -> dict[str, Any]:
config["presence_penalty"] = self.presence_penalty
if self.max_tokens is not None:
config["max_tokens"] = self.max_tokens
if self.api != "completions":
config["api"] = self.api
if self.reasoning_effort is not None:
config["reasoning_effort"] = self.reasoning_effort
return config

@staticmethod
Expand Down Expand Up @@ -277,7 +385,7 @@ def call(
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API.
"""Call Azure AI Inference API (Chat Completions or Responses).

Args:
messages: Input messages for the chat completion
Expand All @@ -291,6 +399,17 @@ def call(
Returns:
Chat completion response or tool call result
"""
if self.api == "responses" and self._responses_delegate is not None:
return self._responses_delegate.call(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)

with llm_call_context():
try:
# Emit call started event
Expand Down Expand Up @@ -349,7 +468,7 @@ async def acall( # type: ignore[return]
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API asynchronously.
"""Call Azure AI Inference API asynchronously (Chat Completions or Responses).

Args:
messages: Input messages for the chat completion
Expand All @@ -363,6 +482,17 @@ async def acall( # type: ignore[return]
Returns:
Chat completion response or tool call result
"""
if self.api == "responses" and self._responses_delegate is not None:
return await self._responses_delegate.acall(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)

with llm_call_context():
try:
self._emit_call_started_event(
Expand Down Expand Up @@ -1090,6 +1220,19 @@ def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]:
}
return {"total_tokens": 0}

@property
def last_response_id(self) -> str | None:
"""Get the last response ID from auto-chaining (Responses API only)."""
if self._responses_delegate is not None:
rid: str | None = self._responses_delegate.last_response_id
return rid
return None

def reset_chain(self) -> None:
"""Reset the auto-chain state (Responses API only)."""
if self._responses_delegate is not None:
self._responses_delegate.reset_chain()

async def aclose(self) -> None:
"""Close the async client and clean up resources.

Expand Down
Loading
Loading