From 678a73e49915cd7ed568dbe65d6eaef9e8db24f5 Mon Sep 17 00:00:00 2001 From: Zelong Yue Date: Tue, 28 Apr 2026 14:22:05 +0800 Subject: [PATCH 1/2] Perftest: keep CUDA context creation ABI-stable cuGetProcAddress resolves versioned driver entry points from the requested CUDA version, not from the headers used to compile perftest. Request the CUDA 3.2 cuCtxCreate ABI explicitly and keep the function pointer plus call site on the three-argument cuCtxCreate_v2 signature, even when CUDA 13 headers expose a newer default cuCtxCreate prototype. The stale configure-generated CUDA_VER macro is removed because source now uses CUDA_VERSION from cuda.h for compile-time checks. The existing CUDA_VER_* constants remain local names for explicit driver-entry ABI request versions. Signed-off-by: Zelong Yue --- configure.ac | 1 - src/cuda_loader.c | 9 +++++---- src/cuda_loader.h | 4 ++-- src/cuda_memory.c | 6 ++++-- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index d8c353f8..dc5108f6 100755 --- a/configure.ac +++ b/configure.ac @@ -369,7 +369,6 @@ if test "$cuda_found" = "yes"; then AC_DEFINE_UNQUOTED([CUDA_PATH], "$cuda_h_path" , [Enable CUDA feature]) AC_CHECK_LIB([cuda], [cuMemGetHandleForAddressRange], [HAVE_CUDA_CUMEMGETHANDLEFORADDRESSRANGE=yes], [HAVE_CUDA_CUMEMGETHANDLEFORADDRESSRANGE=no]) cuda_toolkit_version=`grep "define CUDA_VERSION" $cuda_h_path | cut -d' ' -f3` - AC_DEFINE_UNQUOTED([CUDA_VER], [$cuda_toolkit_version], [Define CUDA_VER]) AC_TRY_LINK([ #include <$cuda_h_path>], [int x = CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD|CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED;], diff --git a/src/cuda_loader.c b/src/cuda_loader.c index 32a550ec..c5c85212 100755 --- a/src/cuda_loader.c +++ b/src/cuda_loader.c @@ -10,7 +10,7 @@ CUresult (*p_cuDeviceGetCount)(int *) = NULL; CUresult (*p_cuDeviceGet)(CUdevice *, int) = NULL; CUresult (*p_cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice) = NULL; CUresult (*p_cuDeviceGetName)(char *, int, CUdevice) = NULL; -CUresult (*p_cuCtxCreate)(CUcontext *, unsigned int, CUdevice) = NULL; +CUresult (*p_cuCtxCreate_v2)(CUcontext *, unsigned int, CUdevice) = NULL; CUresult (*p_cuDevicePrimaryCtxRetain)(CUcontext *, CUdevice) = NULL; CUresult (*p_cuCtxSetCurrent)(CUcontext) = NULL; CUresult (*p_cuCtxDestroy)(CUcontext) = NULL; @@ -25,7 +25,7 @@ CUresult (*p_cuMemcpyDtoD)(CUdeviceptr, CUdeviceptr, size_t) = NULL; CUresult (*p_cuMemGetHandleForAddressRange)(void *, void *, size_t, CUmemRangeHandleType, unsigned int) = NULL; #endif CUresult (*p_cuDriverGetVersion)(int* driverVersion) = NULL; -#if CUDA_VER >= 12000 +#if CUDA_VERSION >= 12000 CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags, CUdriverProcAddressQueryResult* symbolStatus) = NULL; #else CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags) = NULL; @@ -34,7 +34,7 @@ CUresult (*p_cuMemAllocManaged)(CUdeviceptr* dptr, size_t bytesize, unsigned int CUresult (*p_cuCtxSynchronize) (void) = NULL; int load_cuda_function(void **func_ptr, const char *func_name, int version) { - #if CUDA_VER >= 12000 + #if CUDA_VERSION >= 12000 CUresult res = p_cuGetProcAddress(func_name, func_ptr, version, 0, NULL); #else CUresult res = p_cuGetProcAddress(func_name, func_ptr, version, 0); @@ -69,7 +69,8 @@ int load_cuda_library(void) { { (void**)&p_cuDeviceGet, "cuDeviceGet", CUDA_VER_2_0 }, { (void**)&p_cuDeviceGetAttribute, "cuDeviceGetAttribute", CUDA_VER_2_0 }, { (void**)&p_cuDeviceGetName, "cuDeviceGetName", CUDA_VER_2_0 }, - { (void**)&p_cuCtxCreate, "cuCtxCreate", CUDA_VER_3_2 }, + /* CUDA_VER_3_2 selects the cuCtxCreate_v2 ABI across CUDA 11-13. */ + { (void**)&p_cuCtxCreate_v2, "cuCtxCreate", CUDA_VER_3_2 }, { (void**)&p_cuDevicePrimaryCtxRetain, "cuDevicePrimaryCtxRetain", CUDA_VER_7_0 }, { (void**)&p_cuCtxSetCurrent, "cuCtxSetCurrent", CUDA_VER_4_0 }, { (void**)&p_cuCtxDestroy, "cuCtxDestroy", CUDA_VER_4_0 }, diff --git a/src/cuda_loader.h b/src/cuda_loader.h index d0835ac1..e63642c8 100755 --- a/src/cuda_loader.h +++ b/src/cuda_loader.h @@ -30,7 +30,7 @@ extern CUresult (*p_cuDeviceGetCount)(int *); extern CUresult (*p_cuDeviceGet)(CUdevice *, int); extern CUresult (*p_cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice); extern CUresult (*p_cuDeviceGetName)(char *, int, CUdevice); -extern CUresult (*p_cuCtxCreate)(CUcontext *, unsigned int, CUdevice); +extern CUresult (*p_cuCtxCreate_v2)(CUcontext *, unsigned int, CUdevice); extern CUresult (*p_cuDevicePrimaryCtxRetain)(CUcontext *, CUdevice); extern CUresult (*p_cuCtxSetCurrent)(CUcontext); extern CUresult (*p_cuCtxDestroy)(CUcontext); @@ -47,7 +47,7 @@ extern CUresult (*p_cuMemGetHandleForAddressRange)(void *, void *, size_t, CUmem extern CUresult (*p_cuDriverGetVersion)(int* driverVersion); extern CUresult (*p_cuCtxSynchronize) (void); extern CUresult (*p_cuMemAllocManaged)(CUdeviceptr* dptr, size_t bytesize, unsigned int flags); -#if CUDA_VER >= 12000 +#if CUDA_VERSION >= 12000 extern CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags, CUdriverProcAddressQueryResult* symbolStatus); #else extern CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags); diff --git a/src/cuda_memory.c b/src/cuda_memory.c index 92948c21..e24bff4f 100644 --- a/src/cuda_memory.c +++ b/src/cuda_memory.c @@ -94,9 +94,11 @@ static int init_gpu(struct cuda_memory_ctx *ctx) printf("[pid = %d, dev = %d] device name = [%s]\n", getpid(), ctx->cuDevice, name); printf("creating CUDA Ctx\n"); - error = p_cuCtxCreate(&ctx->cuContext, CU_CTX_MAP_HOST, ctx->cuDevice); + /* Create context */ + error = p_cuCtxCreate_v2(&ctx->cuContext, CU_CTX_MAP_HOST, ctx->cuDevice); + if (error != CUDA_SUCCESS) { - printf("cuCtxCreate() error=%d\n", error); + printf("cuCtxCreate_v2() error=%d\n", error); return FAILURE; } From dafceede4c6598cc3c6b628529a9e4c1b1a97e83 Mon Sep 17 00:00:00 2001 From: Zelong Yue Date: Tue, 28 Apr 2026 14:22:17 +0800 Subject: [PATCH 2/2] Perftest: prevent CUDA 13 build regressions in CI Add an Ubuntu 24.04 GitHub Actions job that installs CUDA 13.1 and builds perftest against the CUDA headers. This keeps the CUDA loader compatibility path covered by CI after the driver API signature changes in CUDA 13. Signed-off-by: Zelong Yue --- .github/workflows/ubuntu24_04_cuda13_1.yaml | 55 +++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 .github/workflows/ubuntu24_04_cuda13_1.yaml diff --git a/.github/workflows/ubuntu24_04_cuda13_1.yaml b/.github/workflows/ubuntu24_04_cuda13_1.yaml new file mode 100644 index 00000000..4ca61c68 --- /dev/null +++ b/.github/workflows/ubuntu24_04_cuda13_1.yaml @@ -0,0 +1,55 @@ +name: Build and Test perftest on Ubuntu 24.04 with CUDA 13.1 + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build: + runs-on: ubuntu-24.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install CUDA repository + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + build-essential \ + autoconf \ + automake \ + libtool \ + pkg-config \ + libibverbs-dev \ + librdmacm-dev \ + libibumad-dev \ + libpci-dev \ + cuda-toolkit-13-1 \ + cuda-drivers + + - name: Set up CUDA environment + run: | + echo 'export PATH=/usr/local/cuda-13.1/bin:${PATH}' >> $GITHUB_ENV + echo 'export LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:${LD_LIBRARY_PATH}' >> $GITHUB_ENV + + - name: Run autogen.sh + run: ./autogen.sh + + - name: Configure the build + run: ./configure CUDA_H_PATH=/usr/local/cuda/include/cuda.h + + - name: Build perftest + run: make + + - name: Install perftest + run: sudo make install