summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_vm_madvise.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm_madvise.c')
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c445
1 files changed, 445 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
new file mode 100644
index 000000000000..09c5783ee523
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_vm_madvise.h"
+
+#include <linux/nospec.h>
+#include <drm/xe_drm.h>
+
+#include "xe_bo.h"
+#include "xe_pat.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
+
+struct xe_vmas_in_madvise_range {
+ u64 addr;
+ u64 range;
+ struct xe_vma **vmas;
+ int num_vmas;
+ bool has_svm_vmas;
+ bool has_bo_vmas;
+ bool has_userptr_vmas;
+};
+
+static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
+{
+ u64 addr = madvise_range->addr;
+ u64 range = madvise_range->range;
+
+ struct xe_vma **__vmas;
+ struct drm_gpuva *gpuva;
+ int max_vmas = 8;
+
+ lockdep_assert_held(&vm->lock);
+
+ madvise_range->num_vmas = 0;
+ madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL);
+ if (!madvise_range->vmas)
+ return -ENOMEM;
+
+ vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
+
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (xe_vma_bo(vma))
+ madvise_range->has_bo_vmas = true;
+ else if (xe_vma_is_cpu_addr_mirror(vma))
+ madvise_range->has_svm_vmas = true;
+ else if (xe_vma_is_userptr(vma))
+ madvise_range->has_userptr_vmas = true;
+
+ if (madvise_range->num_vmas == max_vmas) {
+ max_vmas <<= 1;
+ __vmas = krealloc(madvise_range->vmas,
+ max_vmas * sizeof(*madvise_range->vmas),
+ GFP_KERNEL);
+ if (!__vmas) {
+ kfree(madvise_range->vmas);
+ return -ENOMEM;
+ }
+ madvise_range->vmas = __vmas;
+ }
+
+ madvise_range->vmas[madvise_range->num_vmas] = vma;
+ (madvise_range->num_vmas)++;
+ }
+
+ if (!madvise_range->num_vmas)
+ kfree(madvise_range->vmas);
+
+ vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
+
+ return 0;
+}
+
+static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
+
+ for (i = 0; i < num_vmas; i++) {
+ /*TODO: Extend attributes to bo based vmas */
+ if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
+ vmas[i]->attr.preferred_loc.migration_policy ==
+ op->preferred_mem_loc.migration_policy) ||
+ !xe_vma_is_cpu_addr_mirror(vmas[i])) {
+ vmas[i]->skip_invalidation = true;
+ } else {
+ vmas[i]->skip_invalidation = false;
+ vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
+ /* Till multi-device support is not added migration_policy
+ * is of no use and can be ignored.
+ */
+ vmas[i]->attr.preferred_loc.migration_policy =
+ op->preferred_mem_loc.migration_policy;
+ }
+ }
+}
+
+static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ struct xe_bo *bo;
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC);
+ xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU);
+
+ for (i = 0; i < num_vmas; i++) {
+ if (xe_vma_is_userptr(vmas[i]) &&
+ !(op->atomic.val == DRM_XE_ATOMIC_DEVICE &&
+ xe->info.has_device_atomics_on_smem)) {
+ vmas[i]->skip_invalidation = true;
+ continue;
+ }
+
+ if (vmas[i]->attr.atomic_access == op->atomic.val) {
+ vmas[i]->skip_invalidation = true;
+ } else {
+ vmas[i]->skip_invalidation = false;
+ vmas[i]->attr.atomic_access = op->atomic.val;
+ }
+
+ vmas[i]->attr.atomic_access = op->atomic.val;
+
+ bo = xe_vma_bo(vmas[i]);
+ if (!bo || bo->attr.atomic_access == op->atomic.val)
+ continue;
+
+ vmas[i]->skip_invalidation = false;
+ xe_bo_assert_held(bo);
+ bo->attr.atomic_access = op->atomic.val;
+
+ /* Invalidate cpu page table, so bo can migrate to smem in next access */
+ if (xe_bo_is_vram(bo) &&
+ (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU ||
+ bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL))
+ ttm_bo_unmap_virtual(&bo->ttm);
+ }
+}
+
+static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT);
+
+ for (i = 0; i < num_vmas; i++) {
+ if (vmas[i]->attr.pat_index == op->pat_index.val) {
+ vmas[i]->skip_invalidation = true;
+ } else {
+ vmas[i]->skip_invalidation = false;
+ vmas[i]->attr.pat_index = op->pat_index.val;
+ }
+ }
+}
+
+typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op);
+
+static const madvise_func madvise_funcs[] = {
+ [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
+ [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
+ [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
+};
+
+static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ struct drm_gpuva *gpuva;
+ struct xe_tile *tile;
+ u8 id, tile_mask = 0;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ /* Wait for pending binds */
+ if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT) <= 0)
+ XE_WARN_ON(1);
+
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (vma->skip_invalidation || xe_vma_is_null(vma))
+ continue;
+
+ if (xe_vma_is_cpu_addr_mirror(vma)) {
+ tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
+ xe_vma_start(vma),
+ xe_vma_end(vma));
+ } else {
+ for_each_tile(tile, vm->xe, id) {
+ if (xe_pt_zap_ptes(tile, vma)) {
+ tile_mask |= BIT(id);
+
+ /*
+ * WRITE_ONCE pairs with READ_ONCE
+ * in xe_vm_has_valid_gpu_mapping()
+ */
+ WRITE_ONCE(vma->tile_invalidated,
+ vma->tile_invalidated | BIT(id));
+ }
+ }
+ }
+ }
+
+ return tile_mask;
+}
+
+static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
+
+ if (!tile_mask)
+ return 0;
+
+ xe_device_wmb(vm->xe);
+
+ return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask);
+}
+
+static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
+{
+ if (XE_IOCTL_DBG(xe, !args))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
+ return false;
+
+ switch (args->type) {
+ case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC:
+ {
+ s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
+
+ if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
+ DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->atomic.reserved))
+ return false;
+ break;
+ }
+ case DRM_XE_MEM_RANGE_ATTR_ATOMIC:
+ if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->atomic.pad))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->atomic.reserved))
+ return false;
+
+ break;
+ case DRM_XE_MEM_RANGE_ATTR_PAT:
+ {
+ u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val);
+
+ if (XE_IOCTL_DBG(xe, !coh_mode))
+ return false;
+
+ if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->pat_index.pad))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->pat_index.reserved))
+ return false;
+ break;
+ }
+ default:
+ if (XE_IOCTL_DBG(xe, 1))
+ return false;
+ }
+
+ if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+ return false;
+
+ return true;
+}
+
+static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
+ int num_vmas, u32 atomic_val)
+{
+ struct xe_device *xe = vm->xe;
+ struct xe_bo *bo;
+ int i;
+
+ for (i = 0; i < num_vmas; i++) {
+ bo = xe_vma_bo(vmas[i]);
+ if (!bo)
+ continue;
+ /*
+ * NOTE: The following atomic checks are platform-specific. For example,
+ * if a device supports CXL atomics, these may not be necessary or
+ * may behave differently.
+ */
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU &&
+ !(bo->flags & XE_BO_FLAG_SYSTEM)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE &&
+ !(bo->flags & XE_BO_FLAG_VRAM0) &&
+ !(bo->flags & XE_BO_FLAG_VRAM1) &&
+ !(bo->flags & XE_BO_FLAG_SYSTEM &&
+ xe->info.has_device_atomics_on_smem)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL &&
+ (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
+ (!(bo->flags & XE_BO_FLAG_VRAM0) &&
+ !(bo->flags & XE_BO_FLAG_VRAM1)))))
+ return false;
+ }
+ return true;
+}
+/**
+ * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
+ * @dev: DRM device pointer
+ * @data: Pointer to ioctl data (drm_xe_madvise*)
+ * @file: DRM file pointer
+ *
+ * Handles the MADVISE ioctl to provide memory advice for vma's within
+ * input range.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_madvise *args = data;
+ struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
+ .range = args->range, };
+ struct xe_vm *vm;
+ struct drm_exec exec;
+ int err, attr_type;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_DBG(xe, !vm))
+ return -EINVAL;
+
+ if (!madvise_args_are_sane(vm->xe, args)) {
+ err = -EINVAL;
+ goto put_vm;
+ }
+
+ xe_svm_flush(vm);
+
+ err = down_write_killable(&vm->lock);
+ if (err)
+ goto put_vm;
+
+ if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+ err = -ENOENT;
+ goto unlock_vm;
+ }
+
+ err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+ if (err)
+ goto unlock_vm;
+
+ err = get_vmas(vm, &madvise_range);
+ if (err || !madvise_range.num_vmas)
+ goto unlock_vm;
+
+ if (madvise_range.has_bo_vmas) {
+ if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
+ if (!check_bo_args_are_sane(vm, madvise_range.vmas,
+ madvise_range.num_vmas,
+ args->atomic.val)) {
+ err = -EINVAL;
+ goto unlock_vm;
+ }
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&exec) {
+ for (int i = 0; i < madvise_range.num_vmas; i++) {
+ struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]);
+
+ if (!bo)
+ continue;
+ err = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ goto err_fini;
+ }
+ }
+ }
+
+ if (madvise_range.has_userptr_vmas) {
+ err = down_read_interruptible(&vm->userptr.notifier_lock);
+ if (err)
+ goto err_fini;
+ }
+
+ if (madvise_range.has_svm_vmas) {
+ err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock);
+ if (err)
+ goto unlock_userptr;
+ }
+
+ attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
+ madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
+
+ err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
+
+ if (madvise_range.has_svm_vmas)
+ xe_svm_notifier_unlock(vm);
+
+unlock_userptr:
+ if (madvise_range.has_userptr_vmas)
+ up_read(&vm->userptr.notifier_lock);
+err_fini:
+ if (madvise_range.has_bo_vmas)
+ drm_exec_fini(&exec);
+ kfree(madvise_range.vmas);
+ madvise_range.vmas = NULL;
+unlock_vm:
+ up_write(&vm->lock);
+put_vm:
+ xe_vm_put(vm);
+ return err;
+}