Fix int32 overflow deadlock and non-power-of-2 crash in Triton AlltoAllv (#2133) #8617

Workflow file for this run

.github/workflows/build_test.yaml at 7385504

	name: Build and test torchcomms

	on:
	push:
	branches:
	- main
	pull_request:

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}
	cancel-in-progress: true

	jobs:
	build_pip_wheel:
	strategy:
	fail-fast: false
	matrix:
	include:
	- runs-on: "linux.g5.12xlarge.nvidia.gpu"
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.8"
	torch-version: "stable"
	- runs-on: "linux.g5.12xlarge.nvidia.gpu"
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.8"
	torch-version: "nightly"
	- runs-on: "linux.g5.12xlarge.nvidia.gpu"
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.8"
	torch-version: "nightly"
	build-flags: "USE_NCCLX=0 USE_TRANSPORT=0"

	uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
	permissions:
	id-token: write
	contents: read
	with:
	timeout: 60
	runner: ${{ matrix.runs-on }}
	gpu-arch-type: ${{ matrix.gpu-arch-type }}
	gpu-arch-version: ${{ matrix.gpu-arch-version }}
	upload-artifact: build-artifacts-${{ matrix.torch-version }}-${{ matrix.gpu-arch-type }}-${{ matrix.gpu-arch-version }}-${{ matrix.build-flags }}
	script: \|
	set -ex
	source .github/scripts/setup_env.sh --with-cmake --cuda-version "${{ matrix.gpu-arch-version }}" "${{ matrix.torch-version }}"

	export ${{ matrix.build-flags }}

	# Build wheel
	pip install build
	python -m build --wheel --no-isolation

	mkdir -p "${RUNNER_ARTIFACT_DIR}"
	cp dist/*.whl "${RUNNER_ARTIFACT_DIR}/"

	python -c "import torch; print(torch.__version__)" > "${RUNNER_ARTIFACT_DIR}/torch_version.txt"

	build_and_run_cpp_tests:
	strategy:
	fail-fast: false
	matrix:
	include:
	- runs-on: "linux.g5.12xlarge.nvidia.gpu"
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.8"
	torch-version: "stable"
	- runs-on: "linux.g5.12xlarge.nvidia.gpu"
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.8"
	torch-version: "nightly"

	uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
	permissions:
	id-token: write
	contents: read
	with:
	timeout: 60
	runner: ${{ matrix.runs-on }}
	gpu-arch-type: ${{ matrix.gpu-arch-type }}
	gpu-arch-version: ${{ matrix.gpu-arch-version }}
	script: \|
	set -ex
	source .github/scripts/setup_env.sh --with-cmake --cuda-version "${{ matrix.gpu-arch-version }}" "${{ matrix.torch-version }}"

	# Build and run C++ tests
	cmake -B build_tests -G Ninja -DBUILD_TESTS=ON
	cmake --build build_tests
	cd build_tests && ctest --output-on-failure

	run_py_tests:
	needs: build_pip_wheel
	strategy:
	fail-fast: false
	matrix:
	include:
	- runs-on: "linux.g5.12xlarge.nvidia.gpu"
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.8"
	torch-version: "stable"
	- runs-on: "linux.g5.12xlarge.nvidia.gpu"
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.8"
	torch-version: "nightly"
	- runs-on: "linux.g5.12xlarge.nvidia.gpu"
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.8"
	torch-version: "nightly"
	build-flags: "USE_NCCLX=0 USE_TRANSPORT=0"

	uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
	permissions:
	id-token: write
	contents: read
	with:
	timeout: 120
	runner: ${{ matrix.runs-on }}
	gpu-arch-type: ${{ matrix.gpu-arch-type }}
	gpu-arch-version: ${{ matrix.gpu-arch-version }}
	download-artifact: build-artifacts-${{ matrix.torch-version }}-${{ matrix.gpu-arch-type }}-${{ matrix.gpu-arch-version }}-${{ matrix.build-flags }}
	script: \|
	set -ex
	TORCH_VERSION=$(cat "${RUNNER_ARTIFACT_DIR}/torch_version.txt")
	source .github/scripts/setup_env.sh --cuda-version "${{ matrix.gpu-arch-version }}" --torch-version "$TORCH_VERSION" "${{ matrix.torch-version }}"

	export ${{ matrix.build-flags }}

	# Install from pre-built wheel (skip build step)
	pip install "${RUNNER_ARTIFACT_DIR}"/*.whl pytest numpy psutil parameterized pydot requests urllib3 tabulate

	python -c "import torchcomms"

	# Run Python tests
	comms/torchcomms/scripts/run_tests_unit_py.sh
	comms/torchcomms/scripts/run_tests_integration_py.sh

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix int32 overflow deadlock and non-power-of-2 crash in Triton AlltoAllv (#2133) #8617

Workflow file

Fix int32 overflow deadlock and non-power-of-2 crash in Triton AlltoAllv (#2133) #8617

Uh oh!

Workflow file for this run