diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 27ee752b9d8..286869235ca 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -469,7 +469,10 @@ jobs: uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries - path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o + path: | + ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o + ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.a + ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.lib if-no-files-found: error - name: Download cuda.bindings build artifacts from the prior branch diff --git a/.gitignore b/.gitignore index d2a5bf6e52e..a66da355940 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,8 @@ cache_nvrtc # cuda.core test object fixtures built locally / downloaded as CI artifacts cuda_core/tests/test_binaries/*.o +cuda_core/tests/test_binaries/*.a +cuda_core/tests/test_binaries/*.lib # CUDA Python specific (auto-generated) cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh index 8d2231bd90a..dfdea440f00 100755 --- a/cuda_core/tests/test_binaries/build_test_binaries.sh +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -17,4 +17,10 @@ fi nvcc -dc "${NVCC_EXTRA_FLAGS[@]}" -arch=all-major \ -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" -ls -lah "${SCRIPTPATH}/saxpy.o" +if [[ "${OS:-}" == "Windows_NT" ]]; then + nvcc -lib -o "${SCRIPTPATH}/saxpy.lib" "${SCRIPTPATH}/saxpy.o" + ls -lah "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.lib" +else + nvcc -lib -o "${SCRIPTPATH}/saxpy.a" "${SCRIPTPATH}/saxpy.o" + ls -lah "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.a" +fi diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 524dd471345..530b2dbc67c 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -203,6 +203,33 @@ def get_saxpy_object(): return obj_path.read_bytes() +@pytest.fixture(scope="module") +def get_saxpy_library(): + """Read the pre-built saxpy.a (Linux) or saxpy.lib (Windows). + + In CI: produced by build stage alongside saxpy.o. + In local dev: auto-built on demand if nvcc is available; if you edit + saxpy.cu, remove stale saxpy.o / saxpy.a to force a rebuild. + """ + binaries_dir = Path(__file__).parent / "test_binaries" + lib_name = "saxpy.lib" if os.name == "nt" else "saxpy.a" + lib_path = binaries_dir / lib_name + + if not lib_path.is_file(): + if find_nvidia_binary_utility("nvcc") is None: + pytest.skip( + f"{lib_name} not found at {lib_path} and nvcc is unavailable. " + "In CI this is downloaded from the build stage." + ) + subprocess.run( # noqa: S603 + ["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607 + check=True, + env=os.environ, + ) + + return lib_path.read_bytes() + + def test_get_kernel(init_cuda): kernel = """extern "C" __global__ void ABC() { }""" @@ -420,6 +447,66 @@ def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): assert result[0] == 10.0 +def test_object_code_load_library(get_saxpy_library): + lib = get_saxpy_library + assert isinstance(lib, bytes) + mod_obj = ObjectCode.from_library(lib) + assert mod_obj.code == lib + assert mod_obj.code_type == "library" + with pytest.raises(RuntimeError, match=r'Unsupported code type "library"'): + mod_obj.get_kernel("saxpy") + + +def test_object_code_load_library_from_file(get_saxpy_library, tmp_path): + lib_ext = ".lib" if os.name == "nt" else ".a" + lib_file = tmp_path / f"test{lib_ext}" + lib_file.write_bytes(get_saxpy_library) + arg = str(lib_file) + mod_obj = ObjectCode.from_library(arg) + assert mod_obj.code == arg + assert mod_obj.code_type == "library" + + +def test_object_code_load_library_with_linker(get_saxpy_library, init_cuda): + arch = f"sm_{init_cuda.arch}" + kernel_code = Program( + r""" + extern __device__ float saxpy_step(float a, float x, float y); + extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) { + if (threadIdx.x == 0 && blockIdx.x == 0) *out = saxpy_step(a, x, y); + } + """, + "c++", + ProgramOptions(relocatable_device_code=True, arch=arch), + ).compile("cubin") + linked = Linker( + kernel_code, + ObjectCode.from_library(get_saxpy_library), + options=LinkerOptions(arch=arch), + ).link("cubin") + kernel = linked.get_kernel("linked_kernel") + + stream = init_cuda.create_stream() + host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4) + result = np.from_dlpack(host_buf).view(np.float32) + result[:] = 0.0 + dev_buf = init_cuda.memory_resource.allocate(4, stream=init_cuda.default_stream) + + cuda.core.launch( + stream, + cuda.core.LaunchConfig(grid=1, block=1), + kernel, + np.float32(2.0), + np.float32(3.0), + np.float32(4.0), + dev_buf, + ) + dev_buf.copy_to(host_buf, stream=stream) + stream.sync() + + assert result[0] == 10.0 + + def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel_cubin