From 9d4774029753a8d5f38dd6c3085564de007b5f68 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Wed, 10 Jun 2026 17:25:12 +0000 Subject: [PATCH 1/6] checkpoint first version --- .github/workflows/build-wheel.yml | 10 ++++++ .github/workflows/test-wheel-linux.yml | 13 +++++++ .gitignore | 3 ++ ci/tools/env-vars | 1 + cuda_core/tests/test_module.py | 49 ++++++++++++++++++++++++++ 5 files changed, 76 insertions(+) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index e289f3da697..0d6ca87df1d 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -456,6 +456,16 @@ jobs: path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }}/test_*${{ env.PY_EXT_SUFFIX }} if-no-files-found: error + - name: Build cuda.core test binaries + run: bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh + + - name: Upload cuda.core test binaries + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o + if-no-files-found: error + # Note: This overwrites CUDA_PATH etc - name: Set up mini CTK uses: ./.github/actions/fetch_ctk diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 4d64242ea00..3a6e2eb9726 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -279,6 +279,19 @@ jobs: pwd ls -lahR $CUDA_CORE_CYTHON_TESTS_DIR + - name: Download cuda.core test binaries + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Display structure of downloaded cuda.core test binaries + run: | + pwd + ls -lahR $CUDA_CORE_TEST_BINARIES_DIR + - name: Set up Python ${{ matrix.PY_VER }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: diff --git a/.gitignore b/.gitignore index c55480b7551..d2a5bf6e52e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ cache_driver cache_runtime cache_nvrtc +# cuda.core test object fixtures built locally / downloaded as CI artifacts +cuda_core/tests/test_binaries/*.o + # CUDA Python specific (auto-generated) cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx diff --git a/ci/tools/env-vars b/ci/tools/env-vars index 30fac1cdce8..8ffbfa13472 100755 --- a/ci/tools/env-vars +++ b/ci/tools/env-vars @@ -34,6 +34,7 @@ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${HOST_ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${SHA}" echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "${REPO_DIR}/cuda_core/dist")" echo "CUDA_CORE_CYTHON_TESTS_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/cython")" + echo "CUDA_CORE_TEST_BINARIES_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/test_binaries")" echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" } >> $GITHUB_ENV diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 3a438f825a0..ebeb0775808 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -172,6 +172,35 @@ def get_saxpy_fatbin(init_cuda): return bytes(fatbin), sym_map +@pytest.fixture(scope="module") +def get_saxpy_object(): + """Read the pre-built saxpy.o. + + In CI: produced by build stage into a test wheel file. + In local dev: auto-built on demand if nvcc is available; if you edit + saxpy.cu, remove the stale saxpy.o to force a rebuild. + """ + import shutil + import subprocess + from pathlib import Path + + binaries_dir = Path(__file__).parent / "test_binaries" + obj_path = binaries_dir / "saxpy.o" + + if not obj_path.is_file(): + if shutil.which("nvcc") is None: + pytest.skip( + f"saxpy.o not found at {obj_path} and nvcc is unavailable. " + "In CI this is downloaded from the build stage." + ) + subprocess.run( # noqa: S603 + ["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607 + check=True, + ) + + return obj_path.read_bytes() + + def test_get_kernel(init_cuda): kernel = """extern "C" __global__ void ABC() { }""" @@ -330,6 +359,26 @@ def test_object_code_load_fatbin_from_file(get_saxpy_fatbin, tmp_path, convert_p mod_obj.get_kernel("saxpy") # force loading +def test_object_code_load_object(get_saxpy_object): + obj = get_saxpy_object + assert isinstance(obj, bytes) + mod_obj = ObjectCode.from_object(obj) + assert mod_obj.code == obj + assert mod_obj.code_type == "object" + # object code is only valid as linker input; get_kernel is unsupported + with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): + mod_obj.get_kernel("saxpy") + + +def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): + obj_file = tmp_path / "test.o" + obj_file.write_bytes(get_saxpy_object) + arg = str(obj_file) + mod_obj = ObjectCode.from_object(arg) + assert mod_obj.code == arg + assert mod_obj.code_type == "object" + + def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel_cubin From 0bca2be37dd34bb079e6fe335f87015bd8483362 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Wed, 10 Jun 2026 18:34:58 +0000 Subject: [PATCH 2/6] add test_binaries folder --- .../tests/test_binaries/build_test_binaries.sh | 14 ++++++++++++++ cuda_core/tests/test_binaries/saxpy.cu | 12 ++++++++++++ 2 files changed, 26 insertions(+) create mode 100755 cuda_core/tests/test_binaries/build_test_binaries.sh create mode 100644 cuda_core/tests/test_binaries/saxpy.cu diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh new file mode 100755 index 00000000000..6e4de10b261 --- /dev/null +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +# Build .o test fixtures. Invoked at CI build stage + +SCRIPTPATH=$(dirname "$(realpath "$0")") + +nvcc -dc -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" + +ls -lah "${SCRIPTPATH}/saxpy.o" diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu new file mode 100644 index 00000000000..6ac3fa714d1 --- /dev/null +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include + +template +__global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (size_t i = tid; i < N; i += gridDim.x * blockDim.x) { + out[tid] = a * x[tid] + y[tid]; + } +} From 9785cf664de70222d476088850e65a6829aecb74 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Thu, 11 Jun 2026 16:30:04 +0000 Subject: [PATCH 3/6] add download test binaries in test-wheel-windows.yaml --- .github/workflows/test-wheel-windows.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 6ccacfff8f1..6db50b89da7 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -259,6 +259,19 @@ jobs: Get-Location Get-ChildItem -Recurse -Force $env:CUDA_CORE_CYTHON_TESTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName + - name: Download cuda.core test binaries + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Display structure of downloaded cuda.core test binaries + run: | + Get-Location + Get-ChildItem -Recurse -Force $env:CUDA_CORE_TEST_BINARIES_DIR | Select-Object Mode, LastWriteTime, Length, FullName + - name: Set up Python ${{ matrix.PY_VER }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: From 4fd635aeff6ac22f99c82889501b9933fde7cf88 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 18:06:48 +0000 Subject: [PATCH 4/6] use device cuda/std/cstddef --- cuda_core/tests/test_binaries/saxpy.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu index 6ac3fa714d1..9e380186635 100644 --- a/cuda_core/tests/test_binaries/saxpy.cu +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -#include +#include template __global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { From 6e5c0bda29ac3ea218fcb8869346303186501c68 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 18:16:27 +0000 Subject: [PATCH 5/6] replace shutil with find_nvidia_binary_utility(nvcc) --- cuda_core/tests/test_module.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index ebeb0775808..f2a663238ff 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -180,15 +180,16 @@ def get_saxpy_object(): In local dev: auto-built on demand if nvcc is available; if you edit saxpy.cu, remove the stale saxpy.o to force a rebuild. """ - import shutil import subprocess from pathlib import Path + from cuda.pathfinder import find_nvidia_binary_utility + binaries_dir = Path(__file__).parent / "test_binaries" obj_path = binaries_dir / "saxpy.o" if not obj_path.is_file(): - if shutil.which("nvcc") is None: + if find_nvidia_binary_utility("nvcc") is None: pytest.skip( f"saxpy.o not found at {obj_path} and nvcc is unavailable. " "In CI this is downloaded from the build stage." From a36588b446caa60427a56106ff9ca0d688b9d547 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 22:17:02 +0000 Subject: [PATCH 6/6] add testing with linker --- cuda_core/tests/test_binaries/saxpy.cu | 4 +++ cuda_core/tests/test_module.py | 45 ++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu index 9e380186635..f0afc58637c 100644 --- a/cuda_core/tests/test_binaries/saxpy.cu +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -3,6 +3,10 @@ #include +__device__ float saxpy_step(float a, float x, float y) { + return a * x + y; +} + template __global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index f2a663238ff..f82777a95f4 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -8,7 +8,7 @@ import pytest import cuda.core -from cuda.core import Device, Kernel, ObjectCode, Program, ProgramOptions +from cuda.core import Device, Kernel, Linker, LinkerOptions, ObjectCode, Program, ProgramOptions from cuda.core._program import _can_load_generated_ptx from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return from cuda.core._utils.version import binding_version, driver_version @@ -366,7 +366,6 @@ def test_object_code_load_object(get_saxpy_object): mod_obj = ObjectCode.from_object(obj) assert mod_obj.code == obj assert mod_obj.code_type == "object" - # object code is only valid as linker input; get_kernel is unsupported with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): mod_obj.get_kernel("saxpy") @@ -380,6 +379,48 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): assert mod_obj.code_type == "object" +def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): + arch = "sm_" + "".join(f"{i}" for i in init_cuda.compute_capability) + kernel_ptx = Program( + r""" + extern __device__ float saxpy_step(float a, float x, float y); + extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) { + if (threadIdx.x == 0 && blockIdx.x == 0) *out = saxpy_step(a, x, y); + } + """, + "c++", + ProgramOptions(relocatable_device_code=True, arch=arch), + ).compile("ptx") + linked = Linker( + kernel_ptx, + ObjectCode.from_object(get_saxpy_object), + options=LinkerOptions(arch=arch), + ).link("cubin") + kernel = linked.get_kernel("linked_kernel") + + import numpy as np + + stream = init_cuda.create_stream() + host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4) + result = np.from_dlpack(host_buf).view(np.float32) + result[:] = 0.0 + dev_buf = init_cuda.memory_resource.allocate(4, stream=init_cuda.default_stream) + + cuda.core.launch( + stream, + cuda.core.LaunchConfig(grid=1, block=1), + kernel, + np.float32(2.0), + np.float32(3.0), + np.float32(4.0), + dev_buf, + ) + dev_buf.copy_to(host_buf, stream=stream) + stream.sync() + + assert result[0] == 10.0 + + def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel_cubin