Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion cuda_core/tests/memory/test_managed_ops.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import mmap

import pytest
from helpers.buffers import DummyDeviceMemoryResource, DummyUnifiedMemoryResource

Expand All @@ -9,7 +11,14 @@
from cuda.core import Device, Host, ManagedBuffer
from cuda.core._memory._managed_buffer import _get_int_attr

_MANAGED_TEST_ALLOCATION_SIZE = 4096
# Managed-memory prefetch and CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION
# operate at physical-page granularity. Test buffers must each occupy a full
# page; otherwise the pool packs sub-page allocations into one page and
# per-buffer prefetch locations become indistinguishable. ``mmap.PAGESIZE``
# tracks the OS page size (4 KiB on most x86, 64 KiB on nvidia-64k aarch64
# kernels), so allocations stay one-page-per-buffer on every platform.
_PAGE_SIZE = mmap.PAGESIZE
_MANAGED_TEST_ALLOCATION_SIZE = _PAGE_SIZE
_READ_MOSTLY_ENABLED = 1
_HOST_LOCATION_ID = -1
_INVALID_HOST_DEVICE_ORDINAL = 0
Expand All @@ -21,6 +30,12 @@ def _last_prefetch_location(buf):
return _get_int_attr(buf, driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION)


def _page_base(buf):
# Page-aligned base of the buffer's start address; two buffers sharing a
# page cannot be prefetched to different locations independently.
return int(buf.handle) & ~(_PAGE_SIZE - 1)


def _skip_if_raw_managed_alloc_unsupported(device):
# Raw `cuMemAllocManaged` capability — distinct from conftest's
# `skip_if_managed_memory_unsupported`, which gates `ManagedMemoryResource`
Expand Down Expand Up @@ -216,6 +231,10 @@ def test_per_buffer_location(self, location_ops_device, location_ops_mr):

device = location_ops_device
bufs = [location_ops_mr.allocate(_MANAGED_TEST_ALLOCATION_SIZE, stream=device.default_stream) for _ in range(2)]
# Per-buffer prefetch locations are only observable when the buffers sit
# on distinct physical pages; assert that here so a pool-packing change
# fails loudly instead of silently migrating one shared page.
assert _page_base(bufs[0]) != _page_base(bufs[1])
stream = device.create_stream()

prefetch_batch(stream, bufs, [Host(), device])
Expand Down
Loading