Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,16 @@ jobs:
'name = "rapids-singlecell"',
f'name = "rapids-singlecell-cu{cuda}"',
)
# Rename matching extra to "rapids", remove the other
# Rename matching extra to "rapids", remove the other CUDA extra
text = text.replace(f'rapids-cu{cuda} =', 'rapids =')
# Remove the other CUDA extra line entirely
lines = text.splitlines(keepends=True)
text = "".join(l for l in lines if f'rapids-cu{other}' not in l)
# Remove the other CUDA extra (handles multi-line TOML arrays)
import re
text = re.sub(
rf'^rapids-cu{other}\s*=\s*\[.*?\]\s*\n',
'',
text,
flags=re.MULTILINE | re.DOTALL,
)

# Set CUDA architectures (replace "native" with CI target archs)
text = text.replace(
Expand Down Expand Up @@ -112,14 +117,23 @@ jobs:
CIBW_ENVIRONMENT_PASS_LINUX: SETUPTOOLS_SCM_PRETEND_VERSION
CIBW_ENVIRONMENT: >
CUDA_PATH=/usr/local/cuda
LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
PATH=/usr/local/cuda/bin:$PATH
CIBW_BEFORE_BUILD: >
python -m pip install -U pip
scikit-build-core cmake ninja nanobind
librmm-cu${{ matrix.cuda_major }} &&
SITE=$(python -c "import sysconfig;print(sysconfig.get_path('purelib'))") &&
echo "[rsc-build] site-packages=$SITE" &&
echo "[rsc-build] librmm=$(ls $SITE/librmm/lib64/*.so 2>/dev/null)" &&
echo "[rsc-build] rapids_logger=$(ls $SITE/rapids_logger/lib64/*.so 2>/dev/null)" &&
ln -sf "$SITE/librmm/lib64/librmm.so" /usr/local/lib/librmm.so &&
ln -sf "$SITE/rapids_logger/lib64/librapids_logger.so" /usr/local/lib/librapids_logger.so &&
ldconfig &&
python -c "import librmm;print(librmm.__path__[0])" > /tmp/.librmm_dir &&
echo "[rsc-build] marker=$(cat /tmp/.librmm_dir)"
CIBW_TEST_SKIP: "*"
CIBW_TEST_COMMAND: ""
CIBW_REPAIR_WHEEL_COMMAND: "auditwheel repair --exclude libcublas.so.${{ matrix.cuda_major }} --exclude libcublasLt.so.${{ matrix.cuda_major }} --exclude libcudart.so.${{ matrix.cuda_major }} -w {dest_dir} {wheel}"
CIBW_REPAIR_WHEEL_COMMAND: "auditwheel repair --exclude libcublas.so.${{ matrix.cuda_major }} --exclude libcublasLt.so.${{ matrix.cuda_major }} --exclude libcudart.so.${{ matrix.cuda_major }} --exclude librmm.so --exclude librapids_logger.so -w {dest_dir} {wheel}"
CIBW_BUILD_VERBOSITY: "1"

- uses: actions/upload-artifact@v4
Expand Down
48 changes: 47 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,47 @@ if (RSC_BUILD_EXTENSIONS)
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT})
find_package(nanobind CONFIG REQUIRED)
find_package(CUDAToolkit REQUIRED)

# Find librmm cmake config.
# Works with conda, pixi, uv, venv — uses env root to find site-packages.
# Priority: LIBRMM_DIR env var > CONDA_PREFIX > VIRTUAL_ENV > Python prefix.
set(_env_roots "")
if(DEFINED ENV{LIBRMM_DIR})
list(APPEND _env_roots "$ENV{LIBRMM_DIR}/..")
endif()
foreach(_var CONDA_PREFIX VIRTUAL_ENV PIXI_PROJECT_ROOT)
if(DEFINED ENV{${_var}})
list(APPEND _env_roots "$ENV{${_var}}")
endif()
endforeach()
# Fallback: Python prefix (works for any env manager)
execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import sys; print(sys.prefix)"
OUTPUT_VARIABLE _py_prefix OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET)
if(_py_prefix)
list(APPEND _env_roots "${_py_prefix}")
endif()
# CI/cibuildwheel: CIBW_BEFORE_BUILD writes the librmm path to a marker file
if(EXISTS "/tmp/.librmm_dir")
file(READ "/tmp/.librmm_dir" _rmm_marker)
string(STRIP "${_rmm_marker}" _rmm_marker)
# Marker contains e.g. /opt/.../site-packages/librmm — find cmake dir + deps
file(GLOB _marker_hints "${_rmm_marker}/lib*/cmake"
"${_rmm_marker}/../rapids_logger/lib*/cmake")
list(APPEND CMAKE_PREFIX_PATH ${_marker_hints})
endif()
foreach(_root ${_env_roots})
file(GLOB _hints "${_root}/lib/cmake/rmm"
"${_root}/lib/python*/site-packages/librmm/lib*/cmake/rmm"
"${_root}/lib/python*/site-packages/rapids_logger/lib*/cmake/rapids_logger")
foreach(_h ${_hints})
get_filename_component(_dir "${_h}" DIRECTORY)
list(APPEND CMAKE_PREFIX_PATH "${_dir}")
endforeach()
endforeach()
message(STATUS "rmm search roots: ${_env_roots}")
message(STATUS "rmm CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}")
find_package(rmm CONFIG REQUIRED)
message(STATUS "Building for CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
else()
message(STATUS "RSC_BUILD_EXTENSIONS=OFF -> skipping compiled extensions for docs")
Expand Down Expand Up @@ -84,7 +125,8 @@ if (RSC_BUILD_EXTENSIONS)
add_nb_cuda_module(_edistance_cuda src/rapids_singlecell/_cuda/edistance/edistance.cu)
add_nb_cuda_module(_hvg_cuda src/rapids_singlecell/_cuda/hvg/hvg.cu)
add_nb_cuda_module(_kde_cuda src/rapids_singlecell/_cuda/kde/kde.cu)
add_nb_cuda_module(_wilcoxon_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon.cu)
add_nb_cuda_module(_wilcoxon_ovr_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon_ovr.cu)
add_nb_cuda_module(_wilcoxon_ovo_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon_ovo.cu)
# Harmony CUDA modules
add_nb_cuda_module(_harmony_scatter_cuda src/rapids_singlecell/_cuda/harmony/scatter/scatter.cu)
add_nb_cuda_module(_harmony_outer_cuda src/rapids_singlecell/_cuda/harmony/outer/outer.cu)
Expand All @@ -100,4 +142,8 @@ if (RSC_BUILD_EXTENSIONS)
target_link_libraries(_harmony_correction_batched_cuda PRIVATE CUDA::cublas)
# Wilcoxon binned histogram CUDA module
add_nb_cuda_module(_wilcoxon_binned_cuda src/rapids_singlecell/_cuda/wilcoxon_binned/wilcoxon_binned.cu)
if(rmm_FOUND)
target_link_libraries(_wilcoxon_ovr_cuda PRIVATE rmm::rmm)
target_link_libraries(_wilcoxon_ovo_cuda PRIVATE rmm::rmm)
endif()
endif()
18 changes: 16 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,22 @@ dependencies = [
]

[project.optional-dependencies]
rapids-cu13 = [ "cupy-cuda13x", "cudf-cu13>=25.10", "cuml-cu13>=25.10", "cugraph-cu13>=25.10", "cuvs-cu13>=25.10" ]
rapids-cu12 = [ "cupy-cuda12x", "cudf-cu12>=25.10", "cuml-cu12>=25.10", "cugraph-cu12>=25.10", "cuvs-cu12>=25.10" ]
rapids-cu13 = [
"cupy-cuda13x",
"librmm-cu13>=25.10",
"cudf-cu13>=25.10",
"cuml-cu13>=25.10",
"cugraph-cu13>=25.10",
"cuvs-cu13>=25.10",
]
rapids-cu12 = [
"cupy-cuda12x",
"librmm-cu12>=25.10",
"cudf-cu12>=25.10",
"cuml-cu12>=25.10",
"cugraph-cu12>=25.10",
"cuvs-cu12>=25.10",
]

doc = [
"sphinx>=4.5.0",
Expand Down
15 changes: 14 additions & 1 deletion src/rapids_singlecell/_cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@

import importlib

# Pre-load librmm.so + deps so the dynamic linker can resolve them when
# our nanobind extensions (which link rmm) are imported. This is the same
# pattern used by cuml, cuvs, and other RAPIDS packages.
try:
import librmm

librmm.load_library()
except (ImportError, OSError):
pass

_RMM_MODULES = {"_wilcoxon_ovo_cuda", "_wilcoxon_ovr_cuda"}

__all__ = [
"_aggr_cuda",
"_aucell_cuda",
Expand Down Expand Up @@ -44,7 +56,8 @@
"_sparse2dense_cuda",
"_spca_cuda",
"_wilcoxon_binned_cuda",
"_wilcoxon_cuda",
"_wilcoxon_ovo_cuda",
"_wilcoxon_ovr_cuda",
]


Expand Down
7 changes: 7 additions & 0 deletions src/rapids_singlecell/_cuda/nb_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ using gpu_array = nb::ndarray<T, Device>;
template <typename T, typename Device, typename Contig>
using gpu_array_contig = nb::ndarray<T, Device, Contig>;

// Host (NumPy) array aliases
template <typename T>
using host_array = nb::ndarray<T, nb::numpy, nb::ndim<1>>;

template <typename T>
using host_array_2d = nb::ndarray<T, nb::numpy>;

// Register bindings for both regular CUDA and managed-memory arrays.
// Usage:
// template <typename Device>
Expand Down
Loading
Loading