diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm_madvise.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm_madvise.c | 445 |
1 files changed, 445 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c new file mode 100644 index 000000000000..09c5783ee523 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_vm_madvise.h" + +#include <linux/nospec.h> +#include <drm/xe_drm.h> + +#include "xe_bo.h" +#include "xe_pat.h" +#include "xe_pt.h" +#include "xe_svm.h" + +struct xe_vmas_in_madvise_range { + u64 addr; + u64 range; + struct xe_vma **vmas; + int num_vmas; + bool has_svm_vmas; + bool has_bo_vmas; + bool has_userptr_vmas; +}; + +static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) +{ + u64 addr = madvise_range->addr; + u64 range = madvise_range->range; + + struct xe_vma **__vmas; + struct drm_gpuva *gpuva; + int max_vmas = 8; + + lockdep_assert_held(&vm->lock); + + madvise_range->num_vmas = 0; + madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL); + if (!madvise_range->vmas) + return -ENOMEM; + + vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (xe_vma_bo(vma)) + madvise_range->has_bo_vmas = true; + else if (xe_vma_is_cpu_addr_mirror(vma)) + madvise_range->has_svm_vmas = true; + else if (xe_vma_is_userptr(vma)) + madvise_range->has_userptr_vmas = true; + + if (madvise_range->num_vmas == max_vmas) { + max_vmas <<= 1; + __vmas = krealloc(madvise_range->vmas, + max_vmas * sizeof(*madvise_range->vmas), + GFP_KERNEL); + if (!__vmas) { + kfree(madvise_range->vmas); + return -ENOMEM; + } + madvise_range->vmas = __vmas; + } + + madvise_range->vmas[madvise_range->num_vmas] = vma; + (madvise_range->num_vmas)++; + } + + if (!madvise_range->num_vmas) + kfree(madvise_range->vmas); + + vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas); + + return 0; +} + +static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC); + + for (i = 0; i < num_vmas; i++) { + /*TODO: Extend attributes to bo based vmas */ + if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd && + vmas[i]->attr.preferred_loc.migration_policy == + op->preferred_mem_loc.migration_policy) || + !xe_vma_is_cpu_addr_mirror(vmas[i])) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd; + /* Till multi-device support is not added migration_policy + * is of no use and can be ignored. + */ + vmas[i]->attr.preferred_loc.migration_policy = + op->preferred_mem_loc.migration_policy; + } + } +} + +static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + struct xe_bo *bo; + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC); + xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU); + + for (i = 0; i < num_vmas; i++) { + if (xe_vma_is_userptr(vmas[i]) && + !(op->atomic.val == DRM_XE_ATOMIC_DEVICE && + xe->info.has_device_atomics_on_smem)) { + vmas[i]->skip_invalidation = true; + continue; + } + + if (vmas[i]->attr.atomic_access == op->atomic.val) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.atomic_access = op->atomic.val; + } + + vmas[i]->attr.atomic_access = op->atomic.val; + + bo = xe_vma_bo(vmas[i]); + if (!bo || bo->attr.atomic_access == op->atomic.val) + continue; + + vmas[i]->skip_invalidation = false; + xe_bo_assert_held(bo); + bo->attr.atomic_access = op->atomic.val; + + /* Invalidate cpu page table, so bo can migrate to smem in next access */ + if (xe_bo_is_vram(bo) && + (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU || + bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL)) + ttm_bo_unmap_virtual(&bo->ttm); + } +} + +static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT); + + for (i = 0; i < num_vmas; i++) { + if (vmas[i]->attr.pat_index == op->pat_index.val) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.pat_index = op->pat_index.val; + } + } +} + +typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op); + +static const madvise_func madvise_funcs[] = { + [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, + [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, + [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, +}; + +static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpuva *gpuva; + struct xe_tile *tile; + u8 id, tile_mask = 0; + + lockdep_assert_held_write(&vm->lock); + + /* Wait for pending binds */ + if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT) <= 0) + XE_WARN_ON(1); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (vma->skip_invalidation || xe_vma_is_null(vma)) + continue; + + if (xe_vma_is_cpu_addr_mirror(vma)) { + tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm, + xe_vma_start(vma), + xe_vma_end(vma)); + } else { + for_each_tile(tile, vm->xe, id) { + if (xe_pt_zap_ptes(tile, vma)) { + tile_mask |= BIT(id); + + /* + * WRITE_ONCE pairs with READ_ONCE + * in xe_vm_has_valid_gpu_mapping() + */ + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated | BIT(id)); + } + } + } + } + + return tile_mask; +} + +static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) +{ + u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end); + + if (!tile_mask) + return 0; + + xe_device_wmb(vm->xe); + + return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask); +} + +static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) +{ + if (XE_IOCTL_DBG(xe, !args)) + return false; + + if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K))) + return false; + + if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K))) + return false; + + if (XE_IOCTL_DBG(xe, args->range < SZ_4K)) + return false; + + switch (args->type) { + case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: + { + s32 fd = (s32)args->preferred_mem_loc.devmem_fd; + + if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > + DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.reserved)) + return false; + break; + } + case DRM_XE_MEM_RANGE_ATTR_ATOMIC: + if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.reserved)) + return false; + + break; + case DRM_XE_MEM_RANGE_ATTR_PAT: + { + u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val); + + if (XE_IOCTL_DBG(xe, !coh_mode)) + return false; + + if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) + return false; + + if (XE_IOCTL_DBG(xe, args->pat_index.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->pat_index.reserved)) + return false; + break; + } + default: + if (XE_IOCTL_DBG(xe, 1)) + return false; + } + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return false; + + return true; +} + +static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, + int num_vmas, u32 atomic_val) +{ + struct xe_device *xe = vm->xe; + struct xe_bo *bo; + int i; + + for (i = 0; i < num_vmas; i++) { + bo = xe_vma_bo(vmas[i]); + if (!bo) + continue; + /* + * NOTE: The following atomic checks are platform-specific. For example, + * if a device supports CXL atomics, these may not be necessary or + * may behave differently. + */ + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU && + !(bo->flags & XE_BO_FLAG_SYSTEM))) + return false; + + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE && + !(bo->flags & XE_BO_FLAG_VRAM0) && + !(bo->flags & XE_BO_FLAG_VRAM1) && + !(bo->flags & XE_BO_FLAG_SYSTEM && + xe->info.has_device_atomics_on_smem))) + return false; + + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL && + (!(bo->flags & XE_BO_FLAG_SYSTEM) || + (!(bo->flags & XE_BO_FLAG_VRAM0) && + !(bo->flags & XE_BO_FLAG_VRAM1))))) + return false; + } + return true; +} +/** + * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM + * @dev: DRM device pointer + * @data: Pointer to ioctl data (drm_xe_madvise*) + * @file: DRM file pointer + * + * Handles the MADVISE ioctl to provide memory advice for vma's within + * input range. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_madvise *args = data; + struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, + .range = args->range, }; + struct xe_vm *vm; + struct drm_exec exec; + int err, attr_type; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + if (!madvise_args_are_sane(vm->xe, args)) { + err = -EINVAL; + goto put_vm; + } + + xe_svm_flush(vm); + + err = down_write_killable(&vm->lock); + if (err) + goto put_vm; + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + err = -ENOENT; + goto unlock_vm; + } + + err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + if (err) + goto unlock_vm; + + err = get_vmas(vm, &madvise_range); + if (err || !madvise_range.num_vmas) + goto unlock_vm; + + if (madvise_range.has_bo_vmas) { + if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { + if (!check_bo_args_are_sane(vm, madvise_range.vmas, + madvise_range.num_vmas, + args->atomic.val)) { + err = -EINVAL; + goto unlock_vm; + } + } + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + for (int i = 0; i < madvise_range.num_vmas; i++) { + struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]); + + if (!bo) + continue; + err = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) + goto err_fini; + } + } + } + + if (madvise_range.has_userptr_vmas) { + err = down_read_interruptible(&vm->userptr.notifier_lock); + if (err) + goto err_fini; + } + + if (madvise_range.has_svm_vmas) { + err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock); + if (err) + goto unlock_userptr; + } + + attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); + + err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); + + if (madvise_range.has_svm_vmas) + xe_svm_notifier_unlock(vm); + +unlock_userptr: + if (madvise_range.has_userptr_vmas) + up_read(&vm->userptr.notifier_lock); +err_fini: + if (madvise_range.has_bo_vmas) + drm_exec_fini(&exec); + kfree(madvise_range.vmas); + madvise_range.vmas = NULL; +unlock_vm: + up_write(&vm->lock); +put_vm: + xe_vm_put(vm); + return err; +} |