summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c297
-rw-r--r--drivers/gpu/drm/xe/Kconfig2
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug1
-rw-r--r--drivers/gpu/drm/xe/Makefile5
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h2
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h5
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h15
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c18
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c10
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c73
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c8
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c36
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c16
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c776
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c66
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c225
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.h6
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c91
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c4
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c853
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h78
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h15
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.c468
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.h12
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c66
-rw-r--r--drivers/gpu/drm/xe/xe_device.c11
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c106
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c72
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c5
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c31
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c27
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c73
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c28
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c28
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h32
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c21
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c57
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h33
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c48
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c46
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c55
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.c13
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h30
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c67
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c24
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h2
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c325
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.h18
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c10
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.c2
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.c464
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.h17
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c12
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c90
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c47
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c6
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c40
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c12
-rw-r--r--drivers/gpu/drm/xe/xe_printk.h129
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.c24
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c157
-rw-r--r--drivers/gpu/drm/xe/xe_pt.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pt_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.c1
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_submit.c34
-rw-r--r--drivers/gpu/drm/xe/xe_query.c5
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c6
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c14
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.h2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c115
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.h6
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.c75
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.h17
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h44
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_types.h12
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c11
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c366
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h61
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.c135
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.h13
-rw-r--r--drivers/gpu/drm/xe/xe_tile_printk.h127
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.c3
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c29
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_abi.h130
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.c319
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.h107
-rw-r--r--drivers/gpu/drm/xe/xe_validation.c278
-rw-r--r--drivers/gpu/drm/xe/xe_validation.h192
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c603
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h55
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c40
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h100
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c63
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules1
-rw-r--r--drivers/misc/mei/Kconfig13
-rw-r--r--drivers/misc/mei/Makefile1
-rw-r--r--drivers/misc/mei/bus.c13
-rw-r--r--drivers/misc/mei/mei_lb.c312
-rw-r--r--include/drm/drm_gpusvm.h64
-rw-r--r--include/drm/intel/i915_component.h1
-rw-r--r--include/drm/intel/intel_lb_mei_interface.h70
-rw-r--r--include/linux/mei_cl_bus.h1
124 files changed, 6661 insertions, 2266 deletions
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index e2a9a6ae1d54..eeeeb99cfdf6 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -373,6 +373,12 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
*
* This function initializes the GPU SVM.
*
+ * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free),
+ * then only @gpusvm, @name, and @drm are expected. However, the same base
+ * @gpusvm can also be used with both modes together in which case the full
+ * setup is needed, where the core drm_gpusvm_pages API will simply never use
+ * the other fields.
+ *
* Return: 0 on success, a negative error code on failure.
*/
int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
@@ -383,8 +389,16 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
const struct drm_gpusvm_ops *ops,
const unsigned long *chunk_sizes, int num_chunks)
{
- if (!ops->invalidate || !num_chunks)
- return -EINVAL;
+ if (mm) {
+ if (!ops->invalidate || !num_chunks)
+ return -EINVAL;
+ mmgrab(mm);
+ } else {
+ /* No full SVM mode, only core drm_gpusvm_pages API. */
+ if (ops || num_chunks || mm_range || notifier_size ||
+ device_private_page_owner)
+ return -EINVAL;
+ }
gpusvm->name = name;
gpusvm->drm = drm;
@@ -397,7 +411,6 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
gpusvm->chunk_sizes = chunk_sizes;
gpusvm->num_chunks = num_chunks;
- mmgrab(mm);
gpusvm->root = RB_ROOT_CACHED;
INIT_LIST_HEAD(&gpusvm->notifier_list);
@@ -489,7 +502,8 @@ void drm_gpusvm_fini(struct drm_gpusvm *gpusvm)
drm_gpusvm_range_remove(gpusvm, range);
}
- mmdrop(gpusvm->mm);
+ if (gpusvm->mm)
+ mmdrop(gpusvm->mm);
WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root));
}
EXPORT_SYMBOL_GPL(drm_gpusvm_fini);
@@ -629,13 +643,42 @@ drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm,
range->itree.start = ALIGN_DOWN(fault_addr, chunk_size);
range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1;
INIT_LIST_HEAD(&range->entry);
- range->notifier_seq = LONG_MAX;
- range->flags.migrate_devmem = migrate_devmem ? 1 : 0;
+ range->pages.notifier_seq = LONG_MAX;
+ range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0;
return range;
}
/**
+ * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order.
+ * @hmm_pfn: The current hmm_pfn.
+ * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array.
+ * @npages: Number of pages within the pfn array i.e the hmm range size.
+ *
+ * To allow skipping PFNs with the same flags (like when they belong to
+ * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn,
+ * and return the largest order that will fit inside the CPU PTE, but also
+ * crucially accounting for the original hmm range boundaries.
+ *
+ * Return: The largest order that will safely fit within the size of the hmm_pfn
+ * CPU PTE.
+ */
+static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn,
+ unsigned long hmm_pfn_index,
+ unsigned long npages)
+{
+ unsigned long size;
+
+ size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
+ size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1);
+ hmm_pfn_index += size;
+ if (hmm_pfn_index > npages)
+ size -= (hmm_pfn_index - npages);
+
+ return ilog2(size);
+}
+
+/**
* drm_gpusvm_check_pages() - Check pages
* @gpusvm: Pointer to the GPU SVM structure
* @notifier: Pointer to the GPU SVM notifier structure
@@ -693,7 +736,7 @@ static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
err = -EFAULT;
goto err_free;
}
- i += 0x1 << hmm_pfn_to_map_order(pfns[i]);
+ i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
}
err_free:
@@ -951,31 +994,31 @@ err_mmunlock:
EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert);
/**
- * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal)
+ * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal)
* @gpusvm: Pointer to the GPU SVM structure
- * @range: Pointer to the GPU SVM range structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
* @npages: Number of pages to unmap
*
- * This function unmap pages associated with a GPU SVM range. Assumes and
+ * This function unmap pages associated with a GPU SVM pages struct. Assumes and
* asserts correct locking is in place when called.
*/
-static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
- struct drm_gpusvm_range *range,
- unsigned long npages)
+static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ unsigned long npages)
{
- unsigned long i, j;
- struct drm_pagemap *dpagemap = range->dpagemap;
+ struct drm_pagemap *dpagemap = svm_pages->dpagemap;
struct device *dev = gpusvm->drm->dev;
+ unsigned long i, j;
lockdep_assert_held(&gpusvm->notifier_lock);
- if (range->flags.has_dma_mapping) {
- struct drm_gpusvm_range_flags flags = {
- .__flags = range->flags.__flags,
+ if (svm_pages->flags.has_dma_mapping) {
+ struct drm_gpusvm_pages_flags flags = {
+ .__flags = svm_pages->flags.__flags,
};
for (i = 0, j = 0; i < npages; j++) {
- struct drm_pagemap_addr *addr = &range->dma_addr[j];
+ struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j];
if (addr->proto == DRM_INTERCONNECT_SYSTEM)
dma_unmap_page(dev,
@@ -991,31 +1034,52 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
flags.has_devmem_pages = false;
flags.has_dma_mapping = false;
- WRITE_ONCE(range->flags.__flags, flags.__flags);
+ WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
- range->dpagemap = NULL;
+ svm_pages->dpagemap = NULL;
}
}
/**
- * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range
+ * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages
* @gpusvm: Pointer to the GPU SVM structure
- * @range: Pointer to the GPU SVM range structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
*
* This function frees the dma address array associated with a GPU SVM range.
*/
-static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm,
- struct drm_gpusvm_range *range)
+static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages)
{
lockdep_assert_held(&gpusvm->notifier_lock);
- if (range->dma_addr) {
- kvfree(range->dma_addr);
- range->dma_addr = NULL;
+ if (svm_pages->dma_addr) {
+ kvfree(svm_pages->dma_addr);
+ svm_pages->dma_addr = NULL;
}
}
/**
+ * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages
+ * struct
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
+ * @npages: Number of mapped pages
+ *
+ * This function unmaps and frees the dma address array associated with a GPU
+ * SVM pages struct.
+ */
+void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ unsigned long npages)
+{
+ drm_gpusvm_notifier_lock(gpusvm);
+ __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
+ __drm_gpusvm_free_pages(gpusvm, svm_pages);
+ drm_gpusvm_notifier_unlock(gpusvm);
+}
+EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages);
+
+/**
* drm_gpusvm_range_remove() - Remove GPU SVM range
* @gpusvm: Pointer to the GPU SVM structure
* @range: Pointer to the GPU SVM range to be removed
@@ -1040,8 +1104,8 @@ void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
return;
drm_gpusvm_notifier_lock(gpusvm);
- __drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
- drm_gpusvm_range_free_pages(gpusvm, range);
+ __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages);
+ __drm_gpusvm_free_pages(gpusvm, &range->pages);
__drm_gpusvm_range_remove(notifier, range);
drm_gpusvm_notifier_unlock(gpusvm);
@@ -1107,6 +1171,28 @@ void drm_gpusvm_range_put(struct drm_gpusvm_range *range)
EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
/**
+ * drm_gpusvm_pages_valid() - GPU SVM range pages valid
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
+ *
+ * This function determines if a GPU SVM range pages are valid. Expected be
+ * called holding gpusvm->notifier_lock and as the last step before committing a
+ * GPU binding. This is akin to a notifier seqno check in the HMM documentation
+ * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
+ * function is required for finer grained checking (i.e., per range) if pages
+ * are valid.
+ *
+ * Return: True if GPU SVM range has valid pages, False otherwise
+ */
+static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages)
+{
+ lockdep_assert_held(&gpusvm->notifier_lock);
+
+ return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping;
+}
+
+/**
* drm_gpusvm_range_pages_valid() - GPU SVM range pages valid
* @gpusvm: Pointer to the GPU SVM structure
* @range: Pointer to the GPU SVM range structure
@@ -1123,9 +1209,7 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_range *range)
{
- lockdep_assert_held(&gpusvm->notifier_lock);
-
- return range->flags.has_devmem_pages || range->flags.has_dma_mapping;
+ return drm_gpusvm_pages_valid(gpusvm, &range->pages);
}
EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
@@ -1139,66 +1223,71 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
*
* Return: True if GPU SVM range has valid pages, False otherwise
*/
-static bool
-drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
- struct drm_gpusvm_range *range)
+static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages)
{
bool pages_valid;
- if (!range->dma_addr)
+ if (!svm_pages->dma_addr)
return false;
drm_gpusvm_notifier_lock(gpusvm);
- pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range);
+ pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages);
if (!pages_valid)
- drm_gpusvm_range_free_pages(gpusvm, range);
+ __drm_gpusvm_free_pages(gpusvm, svm_pages);
drm_gpusvm_notifier_unlock(gpusvm);
return pages_valid;
}
/**
- * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
+ * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct
* @gpusvm: Pointer to the GPU SVM structure
- * @range: Pointer to the GPU SVM range structure
+ * @svm_pages: The SVM pages to populate. This will contain the dma-addresses
+ * @mm: The mm corresponding to the CPU range
+ * @notifier: The corresponding notifier for the given CPU range
+ * @pages_start: Start CPU address for the pages
+ * @pages_end: End CPU address for the pages (exclusive)
* @ctx: GPU SVM context
*
- * This function gets pages for a GPU SVM range and ensures they are mapped for
- * DMA access.
+ * This function gets and maps pages for CPU range and ensures they are
+ * mapped for DMA access.
*
* Return: 0 on success, negative error code on failure.
*/
-int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
- struct drm_gpusvm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ struct mm_struct *mm,
+ struct mmu_interval_notifier *notifier,
+ unsigned long pages_start, unsigned long pages_end,
+ const struct drm_gpusvm_ctx *ctx)
{
- struct mmu_interval_notifier *notifier = &range->notifier->notifier;
struct hmm_range hmm_range = {
.default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 :
HMM_PFN_REQ_WRITE),
.notifier = notifier,
- .start = drm_gpusvm_range_start(range),
- .end = drm_gpusvm_range_end(range),
+ .start = pages_start,
+ .end = pages_end,
.dev_private_owner = gpusvm->device_private_page_owner,
};
- struct mm_struct *mm = gpusvm->mm;
void *zdd;
unsigned long timeout =
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
unsigned long i, j;
- unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
- drm_gpusvm_range_end(range));
+ unsigned long npages = npages_in_range(pages_start, pages_end);
unsigned long num_dma_mapped;
unsigned int order = 0;
unsigned long *pfns;
int err = 0;
struct dev_pagemap *pagemap;
struct drm_pagemap *dpagemap;
- struct drm_gpusvm_range_flags flags;
+ struct drm_gpusvm_pages_flags flags;
+ enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE :
+ DMA_BIDIRECTIONAL;
retry:
hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
- if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range))
+ if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages))
goto set_seqno;
pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
@@ -1238,7 +1327,7 @@ map_pages:
*/
drm_gpusvm_notifier_lock(gpusvm);
- flags.__flags = range->flags.__flags;
+ flags.__flags = svm_pages->flags.__flags;
if (flags.unmapped) {
drm_gpusvm_notifier_unlock(gpusvm);
err = -EFAULT;
@@ -1251,13 +1340,12 @@ map_pages:
goto retry;
}
- if (!range->dma_addr) {
+ if (!svm_pages->dma_addr) {
/* Unlock and restart mapping to allocate memory. */
drm_gpusvm_notifier_unlock(gpusvm);
- range->dma_addr = kvmalloc_array(npages,
- sizeof(*range->dma_addr),
- GFP_KERNEL);
- if (!range->dma_addr) {
+ svm_pages->dma_addr =
+ kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL);
+ if (!svm_pages->dma_addr) {
err = -ENOMEM;
goto err_free;
}
@@ -1270,7 +1358,7 @@ map_pages:
for (i = 0, j = 0; i < npages; ++j) {
struct page *page = hmm_pfn_to_page(pfns[i]);
- order = hmm_pfn_to_map_order(pfns[i]);
+ order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
if (is_device_private_page(page) ||
is_device_coherent_page(page)) {
if (zdd != page->zone_device_data && i > 0) {
@@ -1296,13 +1384,13 @@ map_pages:
goto err_unmap;
}
}
- range->dma_addr[j] =
+ svm_pages->dma_addr[j] =
dpagemap->ops->device_map(dpagemap,
gpusvm->drm->dev,
page, order,
- DMA_BIDIRECTIONAL);
+ dma_dir);
if (dma_mapping_error(gpusvm->drm->dev,
- range->dma_addr[j].addr)) {
+ svm_pages->dma_addr[j].addr)) {
err = -EFAULT;
goto err_unmap;
}
@@ -1322,15 +1410,15 @@ map_pages:
addr = dma_map_page(gpusvm->drm->dev,
page, 0,
PAGE_SIZE << order,
- DMA_BIDIRECTIONAL);
+ dma_dir);
if (dma_mapping_error(gpusvm->drm->dev, addr)) {
err = -EFAULT;
goto err_unmap;
}
- range->dma_addr[j] = drm_pagemap_addr_encode
+ svm_pages->dma_addr[j] = drm_pagemap_addr_encode
(addr, DRM_INTERCONNECT_SYSTEM, order,
- DMA_BIDIRECTIONAL);
+ dma_dir);
}
i += 1 << order;
num_dma_mapped = i;
@@ -1339,21 +1427,21 @@ map_pages:
if (pagemap) {
flags.has_devmem_pages = true;
- range->dpagemap = dpagemap;
+ svm_pages->dpagemap = dpagemap;
}
/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
- WRITE_ONCE(range->flags.__flags, flags.__flags);
+ WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
drm_gpusvm_notifier_unlock(gpusvm);
kvfree(pfns);
set_seqno:
- range->notifier_seq = hmm_range.notifier_seq;
+ svm_pages->notifier_seq = hmm_range.notifier_seq;
return 0;
err_unmap:
- __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped);
+ __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped);
drm_gpusvm_notifier_unlock(gpusvm);
err_free:
kvfree(pfns);
@@ -1361,11 +1449,62 @@ err_free:
goto retry;
return err;
}
+EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages);
+
+/**
+ * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @range: Pointer to the GPU SVM range structure
+ * @ctx: GPU SVM context
+ *
+ * This function gets pages for a GPU SVM range and ensures they are mapped for
+ * DMA access.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_range *range,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm,
+ &range->notifier->notifier,
+ drm_gpusvm_range_start(range),
+ drm_gpusvm_range_end(range), ctx);
+}
EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages);
/**
+ * drm_gpusvm_unmap_pages() - Unmap GPU svm pages
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
+ * @npages: Number of pages in @svm_pages.
+ * @ctx: GPU SVM context
+ *
+ * This function unmaps pages associated with a GPU SVM pages struct. If
+ * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in
+ * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode.
+ * Must be called in the invalidate() callback of the corresponding notifier for
+ * IOMMU security model.
+ */
+void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ unsigned long npages,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ if (ctx->in_notifier)
+ lockdep_assert_held_write(&gpusvm->notifier_lock);
+ else
+ drm_gpusvm_notifier_lock(gpusvm);
+
+ __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
+
+ if (!ctx->in_notifier)
+ drm_gpusvm_notifier_unlock(gpusvm);
+}
+EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages);
+
+/**
* drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range
- * drm_gpusvm_range_evict() - Evict GPU SVM range
* @gpusvm: Pointer to the GPU SVM structure
* @range: Pointer to the GPU SVM range structure
* @ctx: GPU SVM context
@@ -1383,15 +1522,7 @@ void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
drm_gpusvm_range_end(range));
- if (ctx->in_notifier)
- lockdep_assert_held_write(&gpusvm->notifier_lock);
- else
- drm_gpusvm_notifier_lock(gpusvm);
-
- __drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
-
- if (!ctx->in_notifier)
- drm_gpusvm_notifier_unlock(gpusvm);
+ return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx);
}
EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages);
@@ -1489,10 +1620,10 @@ void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range,
{
lockdep_assert_held_write(&range->gpusvm->notifier_lock);
- range->flags.unmapped = true;
+ range->pages.flags.unmapped = true;
if (drm_gpusvm_range_start(range) < mmu_range->start ||
drm_gpusvm_range_end(range) > mmu_range->end)
- range->flags.partial_unmap = true;
+ range->pages.flags.partial_unmap = true;
}
EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped);
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 714d5702dfd7..7219f6b884b6 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -40,12 +40,12 @@ config DRM_XE
select DRM_TTM
select DRM_TTM_HELPER
select DRM_EXEC
+ select DRM_GPUSVM if !UML && DEVICE_PRIVATE
select DRM_GPUVM
select DRM_SCHED
select MMU_NOTIFIER
select WANT_DEV_COREDUMP
select AUXILIARY_BUS
- select HMM_MIRROR
select REGMAP if I2C
help
Driver for Intel Xe2 series GPUs and later. Experimental support
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 01735c6ece8b..87902b4bd6d3 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -104,6 +104,7 @@ config DRM_XE_DEBUG_GUC
config DRM_XE_USERPTR_INVAL_INJECT
bool "Inject userptr invalidation -EINVAL errors"
+ depends on DRM_GPUSVM
default n
help
Choose this option when debugging error paths that
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index bb135f457a6c..d9c6cf0f189e 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -84,6 +84,7 @@ xe-y += xe_bb.o \
xe_hw_error.o \
xe_hw_fence.o \
xe_irq.o \
+ xe_late_bind_fw.o \
xe_lrc.o \
xe_migrate.o \
xe_mmio.o \
@@ -130,6 +131,7 @@ xe-y += xe_bb.o \
xe_tuning.o \
xe_uc.o \
xe_uc_fw.o \
+ xe_validation.o \
xe_vm.o \
xe_vm_madvise.o \
xe_vram.o \
@@ -140,8 +142,8 @@ xe-y += xe_bb.o \
xe_wopcm.o
xe-$(CONFIG_I2C) += xe_i2c.o
-xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o
+xe-$(CONFIG_DRM_GPUSVM) += xe_userptr.o
# graphics hardware monitoring (HWMON) support
xe-$(CONFIG_HWMON) += xe_hwmon.o
@@ -326,6 +328,7 @@ ifeq ($(CONFIG_DEBUG_FS),y)
xe_gt_stats.o \
xe_guc_debugfs.o \
xe_huc_debugfs.o \
+ xe_tile_debugfs.o \
xe_uc_debugfs.o
xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 1baa969aaa7c..31090c69dfbe 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -155,6 +155,8 @@ enum xe_guc_action {
XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
+ XE_GUC_ACTION_TEST_G2G_SEND = 0xF001,
+ XE_GUC_ACTION_TEST_G2G_RECV = 0xF002,
XE_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
index b28c8fa061f7..ce5c59517528 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -210,6 +210,11 @@ struct slpc_shared_data {
u8 reserved_mode_definition[4096];
} __packed;
+enum slpc_power_profile {
+ SLPC_POWER_PROFILE_BASE = 0x0,
+ SLPC_POWER_PROFILE_POWER_SAVING = 0x1
+};
+
/**
* DOC: SLPC H2G MESSAGE FORMAT
*
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
index 41d39d67817a..f097fc6d5127 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
@@ -8,6 +8,7 @@
#include "xe_ttm_stolen_mgr.h"
#include "xe_res_cursor.h"
+#include "xe_validation.h"
struct xe_bo;
@@ -21,7 +22,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
u32 start, u32 end)
{
struct xe_bo *bo;
- int err;
+ int err = 0;
u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN;
if (start < SZ_4K)
@@ -32,21 +33,13 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
start = ALIGN(start, align);
}
- bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
- NULL, size, start, end,
- ttm_bo_type_kernel, flags, 0);
+ bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe),
+ size, start, end, ttm_bo_type_kernel, flags);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
bo = NULL;
return err;
}
- err = xe_bo_pin(bo);
- xe_bo_unlock_vm_held(bo);
-
- if (err) {
- xe_bo_put(fb->bo);
- bo = NULL;
- }
fb->bo = bo;
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index 3dfab0c2b827..35a5b07eeba4 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -20,11 +20,11 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size
obj = ERR_PTR(-ENODEV);
if (!IS_DGFX(xe) && !XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) {
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
- NULL, size,
- ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT);
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe),
+ size,
+ ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT, false);
if (!IS_ERR(obj))
drm_info(&xe->drm, "Allocated fbdev into stolen\n");
else
@@ -32,10 +32,10 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size
}
if (IS_ERR(obj)) {
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size,
- ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_GGTT);
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size,
+ ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+ XE_BO_FLAG_GGTT, false);
}
if (IS_ERR(obj)) {
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index 9f941fc2e36b..58581d7aaae6 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -43,11 +43,11 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
return false;
/* Set scanout flag for WC mapping */
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
- NULL, PAGE_ALIGN(size),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe),
+ PAGE_ALIGN(size),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+ XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false);
if (IS_ERR(obj)) {
kfree(vma);
return false;
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index f1f8b5ab53ef..1fd4a815e784 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -102,29 +102,29 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
XE_PAGE_SIZE);
if (IS_DGFX(xe))
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM0 |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM0 |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
else
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
if (IS_ERR(dpt))
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
if (IS_ERR(dpt))
return PTR_ERR(dpt);
@@ -281,7 +281,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
struct drm_gem_object *obj = intel_fb_bo(&fb->base);
struct xe_bo *bo = gem_to_xe_bo(obj);
- int ret;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int ret = 0;
if (!vma)
return ERR_PTR(-ENODEV);
@@ -308,17 +310,22 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
* Pin the framebuffer, we can't use xe_bo_(un)pin functions as the
* assumptions are incorrect for framebuffers
*/
- ret = ttm_bo_reserve(&bo->ttm, false, false, NULL);
- if (ret)
- goto err;
-
- if (IS_DGFX(xe))
- ret = xe_bo_migrate(bo, XE_PL_VRAM0);
- else
- ret = xe_bo_validate(bo, NULL, true);
- if (!ret)
- ttm_bo_pin(&bo->ttm);
- ttm_bo_unreserve(&bo->ttm);
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ if (IS_DGFX(xe))
+ ret = xe_bo_migrate(bo, XE_PL_VRAM0, NULL, &exec);
+ else
+ ret = xe_bo_validate(bo, NULL, true, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ if (!ret)
+ ttm_bo_pin(&bo->ttm);
+ }
if (ret)
goto err;
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 30f1073141fc..4ae847b628e2 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -72,10 +72,10 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
int ret = 0;
/* allocate object of two page for HDCP command memory and store it */
- bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), PAGE_SIZE * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo)) {
drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index 826ac3d578b7..94f00def811b 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -140,8 +140,8 @@ initial_plane_bo(struct xe_device *xe,
page_size);
size -= base;
- bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base,
- ttm_bo_type_kernel, flags);
+ bo = xe_bo_create_pin_map_at_novm(xe, tile0, size, phys_base,
+ ttm_bo_type_kernel, flags, 0, false);
if (IS_ERR(bo)) {
drm_dbg(&xe->drm,
"Failed to create bo phys_base=%pa size %u with flags %x: %li\n",
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index f96b2e2b3064..06cb6b02ec64 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -522,6 +522,7 @@
#define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED)
#define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12)
+#define EUSTALL_PERF_SAMPLING_DISABLE REG_BIT(5)
#define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8)
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 1b101edb838b..b5eff383902c 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -40,7 +40,4 @@
#define INDIRECT_CTX_RING_START_UDW (0x08 + 1)
#define INDIRECT_CTX_RING_CTL (0x0a + 1)
-#define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6)
-#define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd)
-
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 7b40cc8be1c9..2294cf89f3e1 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -23,7 +23,7 @@
static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
bool clear, u64 get_val, u64 assign_val,
- struct kunit *test)
+ struct kunit *test, struct drm_exec *exec)
{
struct dma_fence *fence;
struct ttm_tt *ttm;
@@ -35,7 +35,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
u32 offset;
/* Move bo to VRAM if not already there. */
- ret = xe_bo_validate(bo, NULL, false);
+ ret = xe_bo_validate(bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate bo.\n");
return ret;
@@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
}
/* Evict to system. CCS data should be copied. */
- ret = xe_bo_evict(bo);
+ ret = xe_bo_evict(bo, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to evict bo.\n");
return ret;
@@ -132,14 +132,15 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
/* TODO: Sanity check */
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
if (IS_DGFX(xe))
kunit_info(test, "Testing vram id %u\n", tile->id);
else
kunit_info(test, "Testing system memory\n");
- bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
+ bo_flags, exec);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "Failed to create bo.\n");
return;
@@ -149,18 +150,18 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
- test);
+ test, exec);
if (ret)
goto out_unlock;
kunit_info(test, "Verifying that CCS data survives migration.\n");
ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
- 0xdeadbeefdeadbeefULL, test);
+ 0xdeadbeefdeadbeefULL, test, exec);
if (ret)
goto out_unlock;
kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
- ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
+ ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test, exec);
out_unlock:
xe_bo_unlock(bo);
@@ -210,6 +211,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
struct xe_bo *bo, *external;
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
struct xe_gt *__gt;
int err, i, id;
@@ -218,25 +220,25 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
for (i = 0; i < 2; ++i) {
xe_vm_lock(vm, false);
- bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
+ bo = xe_bo_create_user(xe, vm, 0x10000,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo_flags, exec);
xe_vm_unlock(vm);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "bo create err=%pe\n", bo);
break;
}
- external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
+ external = xe_bo_create_user(xe, NULL, 0x10000,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo_flags, NULL);
if (IS_ERR(external)) {
KUNIT_FAIL(test, "external bo create err=%pe\n", external);
goto cleanup_bo;
}
xe_bo_lock(external, false);
- err = xe_bo_pin_external(external, false);
+ err = xe_bo_pin_external(external, false, exec);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo pin err=%pe\n",
@@ -294,7 +296,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
if (i) {
down_read(&vm->lock);
xe_vm_lock(vm, false);
- err = xe_bo_validate(bo, bo->vm, false);
+ err = xe_bo_validate(bo, bo->vm, false, exec);
xe_vm_unlock(vm);
up_read(&vm->lock);
if (err) {
@@ -303,7 +305,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
goto cleanup_all;
}
xe_bo_lock(external, false);
- err = xe_bo_validate(external, NULL, false);
+ err = xe_bo_validate(external, NULL, false, exec);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo valid err=%pe\n",
@@ -495,9 +497,9 @@ static int shrink_test_run_device(struct xe_device *xe)
INIT_LIST_HEAD(&link->link);
/* We can create bos using WC caching here. But it is slower. */
- bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
+ bo = xe_bo_create_user(xe, NULL, XE_BO_SHRINK_SIZE,
DRM_XE_GEM_CPU_CACHING_WB,
- XE_BO_FLAG_SYSTEM);
+ XE_BO_FLAG_SYSTEM, NULL);
if (IS_ERR(bo)) {
if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 5baeab6b6fb7..a7e548a2bdfb 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -27,7 +27,8 @@ static bool is_dynamic(struct dma_buf_test_params *params)
}
static void check_residency(struct kunit *test, struct xe_bo *exported,
- struct xe_bo *imported, struct dma_buf *dmabuf)
+ struct xe_bo *imported, struct dma_buf *dmabuf,
+ struct drm_exec *exec)
{
struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
u32 mem_type;
@@ -62,7 +63,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
* importer is on a different device. If they're on the same device,
* the exporter and the importer should be the same bo.
*/
- ret = xe_bo_evict(exported);
+ ret = xe_bo_evict(exported, exec);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n",
@@ -77,7 +78,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
}
/* Re-validate the importer. This should move also exporter in. */
- ret = xe_bo_validate(imported, NULL, false);
+ ret = xe_bo_validate(imported, NULL, false, exec);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
KUNIT_FAIL(test, "Validating importer failed with err=%d.\n",
@@ -113,8 +114,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
size = SZ_64K;
kunit_info(test, "running %s\n", __func__);
- bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
- params->mem_mask);
+ bo = xe_bo_create_user(xe, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
+ params->mem_mask, NULL);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(bo));
@@ -142,11 +143,12 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
KUNIT_FAIL(test,
"xe_gem_prime_import() succeeded when it shouldn't have\n");
} else {
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
int err;
/* Is everything where we expect it to be? */
xe_bo_lock(import_bo, false);
- err = xe_bo_validate(import_bo, NULL, false);
+ err = xe_bo_validate(import_bo, NULL, false, exec);
/* Pinning in VRAM is not allowed. */
if (!is_dynamic(params) &&
@@ -159,7 +161,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
err == -ERESTARTSYS);
if (!err)
- check_residency(test, bo, import_bo, dmabuf);
+ check_residency(test, bo, import_bo, dmabuf, exec);
xe_bo_unlock(import_bo);
}
drm_gem_object_put(import);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
new file mode 100644
index 000000000000..3b213fcae916
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/delay.h>
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_kunit_helpers.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_pm.h"
+
+/*
+ * There are different ways to allocate the G2G buffers. The plan for this test
+ * is to make sure that all the possible options work. The particular option
+ * chosen by the driver may vary from one platform to another, it may also change
+ * with time. So to ensure consistency of testing, the relevant driver code is
+ * replicated here to guarantee it won't change without the test being updated
+ * to keep testing the other options.
+ *
+ * In order to test the actual code being used by the driver, there is also the
+ * 'default' scheme. That will use the official driver routines to test whatever
+ * method the driver is using on the current platform at the current time.
+ */
+enum {
+ /* Driver defined allocation scheme */
+ G2G_CTB_TYPE_DEFAULT,
+ /* Single buffer in host memory */
+ G2G_CTB_TYPE_HOST,
+ /* Single buffer in a specific tile, loops across all tiles */
+ G2G_CTB_TYPE_TILE,
+};
+
+/*
+ * Payload is opaque to GuC. So KMD can define any structure or size it wants.
+ */
+struct g2g_test_payload {
+ u32 tx_dev;
+ u32 tx_tile;
+ u32 rx_dev;
+ u32 rx_tile;
+ u32 seqno;
+};
+
+static void g2g_test_send(struct kunit *test, struct xe_guc *guc,
+ u32 far_tile, u32 far_dev,
+ struct g2g_test_payload *payload)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 *action, total;
+ size_t payload_len;
+ int ret;
+
+ static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32)));
+ payload_len = sizeof(*payload) / sizeof(u32);
+
+ total = 4 + payload_len;
+ action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action);
+
+ action[0] = XE_GUC_ACTION_TEST_G2G_SEND;
+ action[1] = far_tile;
+ action[2] = far_dev;
+ action[3] = payload_len;
+ memcpy(action + 4, payload, payload_len * sizeof(u32));
+
+ atomic_inc(&xe->g2g_test_count);
+
+ /*
+ * Should specify the expected response notification here. Problem is that
+ * the response will be coming from a different GuC. By the end, it should
+ * all add up as long as an equal number of messages are sent from each GuC
+ * and to each GuC. However, in the middle negative reservation space errors
+ * and such like can occur. Rather than add intrusive changes to the CT layer
+ * it is simpler to just not bother counting it at all. The system should be
+ * idle when running the selftest, and the selftest's notification total size
+ * is well within the G2H allocation size. So there should be no issues with
+ * needing to block for space, which is all the tracking code is really for.
+ */
+ ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0);
+ kunit_kfree(test, action);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret,
+ gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev);
+}
+
+/*
+ * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously
+ * from the G2H notification handler. Need that to actually complete rather than
+ * thread-abort in order to keep the rest of the driver alive!
+ */
+int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL;
+ u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len;
+ struct g2g_test_payload *payload;
+ size_t payload_len;
+ int ret = 0, i;
+
+ payload_len = sizeof(*payload) / sizeof(u32);
+
+ if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) {
+ xe_gt_err(rx_gt, "G2G test notification invalid length %u", len);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ tx_tile = msg[0];
+ tx_dev = msg[1];
+ got_len = msg[2];
+ payload = (struct g2g_test_payload *)(msg + 3);
+
+ rx_tile = gt_to_tile(rx_gt)->id;
+ rx_dev = G2G_DEV(rx_gt);
+
+ if (got_len != payload_len) {
+ xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile ||
+ payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) {
+ xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n",
+ payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev,
+ tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ if (!xe->g2g_test_array) {
+ xe_gt_err(rx_gt, "G2G: Missing test array!\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ for_each_gt(test_gt, xe, i) {
+ if (gt_to_tile(test_gt)->id != tx_tile)
+ continue;
+
+ if (G2G_DEV(test_gt) != tx_dev)
+ continue;
+
+ if (tx_gt) {
+ xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n",
+ tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ tx_gt = test_gt;
+ }
+ if (!tx_gt) {
+ xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id;
+
+ if (xe->g2g_test_array[idx] != payload->seqno - 1) {
+ xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n",
+ xe->g2g_test_array[idx], payload->seqno - 1,
+ tx_tile, tx_dev, rx_tile, rx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ xe->g2g_test_array[idx] = payload->seqno;
+
+done:
+ atomic_dec(&xe->g2g_test_count);
+ return ret;
+}
+
+/*
+ * Send the given seqno from all GuCs to all other GuCs in tile/GT order
+ */
+static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno)
+{
+ struct xe_gt *near_gt, *far_gt;
+ int i, j;
+
+ for_each_gt(near_gt, xe, i) {
+ u32 near_tile = gt_to_tile(near_gt)->id;
+ u32 near_dev = G2G_DEV(near_gt);
+
+ for_each_gt(far_gt, xe, j) {
+ u32 far_tile = gt_to_tile(far_gt)->id;
+ u32 far_dev = G2G_DEV(far_gt);
+ struct g2g_test_payload payload;
+
+ if (far_gt->info.id == near_gt->info.id)
+ continue;
+
+ payload.tx_dev = near_dev;
+ payload.tx_tile = near_tile;
+ payload.rx_dev = far_dev;
+ payload.rx_tile = far_tile;
+ payload.seqno = seqno;
+ g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload);
+ }
+ }
+}
+
+#define WAIT_TIME_MS 100
+#define WAIT_COUNT (1000 / WAIT_TIME_MS)
+
+static void g2g_wait_for_complete(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+ struct kunit *test = kunit_get_current_test();
+ int wait = 0;
+
+ /* Wait for all G2H messages to be received */
+ while (atomic_read(&xe->g2g_test_count)) {
+ if (++wait > WAIT_COUNT)
+ break;
+
+ msleep(WAIT_TIME_MS);
+ }
+
+ KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count),
+ "Timed out waiting for notifications\n");
+ kunit_info(test, "Got all notifications back\n");
+}
+
+#undef WAIT_TIME_MS
+#undef WAIT_COUNT
+
+static void g2g_clean_array(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+
+ xe->g2g_test_array = NULL;
+}
+
+#define NUM_LOOPS 16
+
+static void g2g_run_test(struct kunit *test, struct xe_device *xe)
+{
+ u32 seqno, max_array;
+ int ret, i, j;
+
+ max_array = xe->info.gt_count * xe->info.gt_count;
+ xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array);
+
+ ret = kunit_add_action_or_reset(test, g2g_clean_array, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n");
+
+ /*
+ * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order.
+ * Tile/GT order doesn't really mean anything to the hardware but it is going
+ * to be a fixed sequence every time.
+ *
+ * Verify that each one comes back having taken the correct route.
+ */
+ ret = kunit_add_action(test, g2g_wait_for_complete, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n");
+ for (seqno = 1; seqno < NUM_LOOPS; seqno++)
+ g2g_test_in_order(test, xe, seqno);
+ seqno--;
+
+ kunit_release_action(test, &g2g_wait_for_complete, xe);
+
+ /* Check for the final seqno in each slot */
+ for (i = 0; i < xe->info.gt_count; i++) {
+ for (j = 0; j < xe->info.gt_count; j++) {
+ u32 idx = (j * xe->info.gt_count) + i;
+
+ if (i == j)
+ KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx],
+ "identity seqno modified: %d for %dx%d!\n",
+ xe->g2g_test_array[idx], i, j);
+ else
+ KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx],
+ "invalid seqno: %d vs %d for %dx%d!\n",
+ xe->g2g_test_array[idx], seqno, i, j);
+ }
+ }
+
+ kunit_kfree(test, xe->g2g_test_array);
+ kunit_release_action(test, &g2g_clean_array, xe);
+
+ kunit_info(test, "Test passed\n");
+}
+
+#undef NUM_LOOPS
+
+static void g2g_ct_stop(struct xe_guc *guc)
+{
+ struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ int i, t;
+
+ for_each_gt(remote_gt, xe, i) {
+ u32 tile, dev;
+
+ if (remote_gt->info.id == gt->info.id)
+ continue;
+
+ tile = gt_to_tile(remote_gt)->id;
+ dev = G2G_DEV(remote_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++)
+ guc_g2g_deregister(guc, tile, dev, t);
+ }
+}
+
+/* Size of a single allocation that contains all G2G CTBs across all GTs */
+static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe)
+{
+ unsigned int count = xe->info.gt_count;
+ u32 num_channels = (count * (count - 1)) / 2;
+
+ kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n",
+ count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE,
+ num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE,
+ num_channels * XE_G2G_TYPE_LIMIT);
+
+ return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
+}
+
+/*
+ * Use the driver's regular CTB allocation scheme.
+ */
+static void g2g_alloc_default(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int i;
+
+ kunit_info(test, "Default [tiles = %d, GTs = %d]\n",
+ xe->info.tile_count, xe->info.gt_count);
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+ int ret;
+
+ ret = guc_g2g_alloc(guc);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret));
+ continue;
+ }
+}
+
+static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo)
+{
+ struct xe_gt *root_gt, *gt;
+ int i;
+
+ root_gt = xe_device_get_gt(xe, 0);
+ root_gt->uc.guc.g2g.bo = bo;
+ root_gt->uc.guc.g2g.owned = true;
+ kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo);
+
+ for_each_gt(gt, xe, i) {
+ if (gt->info.id != 0) {
+ gt->uc.guc.g2g.owned = false;
+ gt->uc.guc.g2g.bo = xe_bo_get(bo);
+ kunit_info(test, "[%d.%d] Pinned 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo);
+ }
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo);
+ }
+}
+
+/*
+ * Allocate a single blob on the host and split between all G2G CTBs.
+ */
+static void g2g_alloc_host(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_bo *bo;
+ u32 g2g_size;
+
+ kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count);
+
+ g2g_size = g2g_ctb_size(test, xe);
+ bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_ALL |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
+ kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
+
+ g2g_distribute(test, xe, bo);
+}
+
+/*
+ * Allocate a single blob on the given tile and split between all G2G CTBs.
+ */
+static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile)
+{
+ struct xe_bo *bo;
+ u32 g2g_size;
+
+ KUNIT_ASSERT_TRUE(test, IS_DGFX(xe));
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile);
+
+ kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n",
+ tile->id, xe->info.tile_count, xe->info.gt_count);
+
+ g2g_size = g2g_ctb_size(test, xe);
+ bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_ALL |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
+ kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
+
+ g2g_distribute(test, xe, bo);
+}
+
+static void g2g_free(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ struct xe_bo *bo;
+ int i;
+
+ for_each_gt(gt, xe, i) {
+ bo = gt->uc.guc.g2g.bo;
+ if (!bo)
+ continue;
+
+ if (gt->uc.guc.g2g.owned) {
+ xe_managed_bo_unpin_map_no_vm(bo);
+ kunit_info(test, "[%d.%d] Unmapped 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, bo);
+ } else {
+ xe_bo_put(bo);
+ kunit_info(test, "[%d.%d] Unpinned 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, bo);
+ }
+
+ gt->uc.guc.g2g.bo = NULL;
+ }
+}
+
+static void g2g_stop(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int i;
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (!guc->g2g.bo)
+ continue;
+
+ g2g_ct_stop(guc);
+ }
+
+ g2g_free(test, xe);
+}
+
+/*
+ * Generate a unique id for each bi-directional CTB for each pair of
+ * near and far tiles/devices. The id can then be used as an index into
+ * a single allocation that is sub-divided into multiple CTBs.
+ *
+ * For example, with two devices per tile and two tiles, the table should
+ * look like:
+ * Far <tile>.<dev>
+ * 0.0 0.1 1.0 1.1
+ * N 0.0 --/-- 00/01 02/03 04/05
+ * e 0.1 01/00 --/-- 06/07 08/09
+ * a 1.0 03/02 07/06 --/-- 10/11
+ * r 1.1 05/04 09/08 11/10 --/--
+ *
+ * Where each entry is Rx/Tx channel id.
+ *
+ * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would
+ * be reading from channel #11 and writing to channel #10. Whereas,
+ * GuC #2 talking to GuC #3 would be read on #10 and write to #11.
+ */
+static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev,
+ u32 type, u32 max_inst, bool have_dev)
+{
+ u32 near = near_tile, far = far_tile;
+ u32 idx = 0, x, y, direction;
+ int i;
+
+ if (have_dev) {
+ near = (near << 1) | near_dev;
+ far = (far << 1) | far_dev;
+ }
+
+ /* No need to send to one's self */
+ if (far == near)
+ return -1;
+
+ if (far > near) {
+ /* Top right table half */
+ x = far;
+ y = near;
+
+ /* T/R is 'forwards' direction */
+ direction = type;
+ } else {
+ /* Bottom left table half */
+ x = near;
+ y = far;
+
+ /* B/L is 'backwards' direction */
+ direction = (1 - type);
+ }
+
+ /* Count the rows prior to the target */
+ for (i = y; i > 0; i--)
+ idx += max_inst - i;
+
+ /* Count this row up to the target */
+ idx += (x - 1 - y);
+
+ /* Slots are in Rx/Tx pairs */
+ idx *= 2;
+
+ /* Pick Rx/Tx direction */
+ idx += direction;
+
+ return idx;
+}
+
+static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 near_tile = gt_to_tile(gt)->id;
+ u32 near_dev = G2G_DEV(gt);
+ u32 max = xe->info.gt_count;
+ int idx;
+ u32 base, desc, buf;
+
+ if (!guc->g2g.bo)
+ return -ENODEV;
+
+ idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev);
+ xe_assert(xe, idx >= 0);
+
+ base = guc_bo_ggtt_addr(guc, guc->g2g.bo);
+ desc = base + idx * G2G_DESC_SIZE;
+ buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
+
+ xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE);
+ xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo));
+
+ return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev,
+ desc, buf, G2G_BUFFER_SIZE);
+}
+
+static void g2g_start(struct kunit *test, struct xe_guc *guc)
+{
+ struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int i;
+ int t, ret;
+ bool have_dev;
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo);
+
+ /* GuC interface will need extending if more GT device types are ever created. */
+ KUNIT_ASSERT_TRUE(test,
+ (gt->info.type == XE_GT_TYPE_MAIN) ||
+ (gt->info.type == XE_GT_TYPE_MEDIA));
+
+ /* Channel numbering depends on whether there are multiple GTs per tile */
+ have_dev = xe->info.gt_count > xe->info.tile_count;
+
+ for_each_gt(remote_gt, xe, i) {
+ u32 tile, dev;
+
+ if (remote_gt->info.id == gt->info.id)
+ continue;
+
+ tile = gt_to_tile(remote_gt)->id;
+ dev = G2G_DEV(remote_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) {
+ ret = g2g_register_flat(guc, tile, dev, t, have_dev);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret));
+ }
+ }
+}
+
+static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile)
+{
+ struct xe_gt *gt;
+ int i, found = 0;
+
+ g2g_stop(test, xe);
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ KUNIT_ASSERT_NULL(test, guc->g2g.bo);
+ }
+
+ switch (ctb_type) {
+ case G2G_CTB_TYPE_DEFAULT:
+ g2g_alloc_default(test, xe);
+ break;
+
+ case G2G_CTB_TYPE_HOST:
+ g2g_alloc_host(test, xe);
+ break;
+
+ case G2G_CTB_TYPE_TILE:
+ g2g_alloc_tile(test, xe, tile);
+ break;
+
+ default:
+ KUNIT_ASSERT_TRUE(test, false);
+ }
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (!guc->g2g.bo)
+ continue;
+
+ if (ctb_type == G2G_CTB_TYPE_DEFAULT)
+ guc_g2g_start(guc);
+ else
+ g2g_start(test, guc);
+ found++;
+ }
+
+ KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found);
+
+ kunit_info(test, "Testing across %d GTs\n", found);
+}
+
+static void g2g_recreate_ctb(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+ struct kunit *test = kunit_get_current_test();
+
+ g2g_stop(test, xe);
+
+ if (xe_guc_g2g_wanted(xe))
+ g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
+}
+
+static void g2g_pm_runtime_put(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+
+ xe_pm_runtime_put(xe);
+}
+
+static void g2g_pm_runtime_get(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n");
+}
+
+static void g2g_check_skip(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ struct xe_gt *gt;
+ int i;
+
+ if (IS_SRIOV_VF(xe))
+ kunit_skip(test, "not supported from a VF");
+
+ if (xe->info.gt_count <= 1)
+ kunit_skip(test, "not enough GTs");
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD)
+ kunit_skip(test,
+ "G2G test interface not available in production firmware builds\n");
+ }
+}
+
+/*
+ * Simple test that does not try to recreate the CTBs.
+ * Requires that the platform already enables G2G comms
+ * but has no risk of leaving the system in a broken state
+ * afterwards.
+ */
+static void xe_live_guc_g2g_kunit_default(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ if (!xe_guc_g2g_wanted(xe))
+ kunit_skip(test, "G2G not enabled");
+
+ g2g_check_skip(test);
+
+ g2g_pm_runtime_get(test);
+
+ kunit_info(test, "Testing default CTBs\n");
+ g2g_run_test(test, xe);
+
+ kunit_release_action(test, &g2g_pm_runtime_put, xe);
+}
+
+/*
+ * More complex test that re-creates the CTBs in various location to
+ * test access to each location from each GuC. Can be run even on
+ * systems that do not enable G2G by default. On the other hand,
+ * because it recreates the CTBs, if something goes wrong it could
+ * leave the system with broken G2G comms.
+ */
+static void xe_live_guc_g2g_kunit_allmem(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ int ret;
+
+ g2g_check_skip(test);
+
+ g2g_pm_runtime_get(test);
+
+ /* Make sure to leave the system as we found it */
+ ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n");
+
+ kunit_info(test, "Testing CTB type 'default'...\n");
+ g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
+ g2g_run_test(test, xe);
+
+ kunit_info(test, "Testing CTB type 'host'...\n");
+ g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL);
+ g2g_run_test(test, xe);
+
+ if (IS_DGFX(xe)) {
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id);
+
+ g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile);
+ g2g_run_test(test, xe);
+ }
+ } else {
+ kunit_info(test, "Skipping local memory on integrated platform\n");
+ }
+
+ kunit_release_action(test, g2g_recreate_ctb, xe);
+ kunit_release_action(test, g2g_pm_runtime_put, xe);
+}
+
+static struct kunit_case xe_guc_g2g_tests[] = {
+ KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_guc_g2g_test_suite = {
+ .name = "xe_guc_g2g",
+ .test_cases = xe_guc_g2g_tests,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
index 81277c77016d..c55e46f1ae92 100644
--- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
+++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
@@ -10,12 +10,14 @@ extern struct kunit_suite xe_bo_shrink_test_suite;
extern struct kunit_suite xe_dma_buf_test_suite;
extern struct kunit_suite xe_migrate_test_suite;
extern struct kunit_suite xe_mocs_test_suite;
+extern struct kunit_suite xe_guc_g2g_test_suite;
kunit_test_suite(xe_bo_test_suite);
kunit_test_suite(xe_bo_shrink_test_suite);
kunit_test_suite(xe_dma_buf_test_suite);
kunit_test_suite(xe_migrate_test_suite);
kunit_test_suite(xe_mocs_test_suite);
+kunit_test_suite(xe_guc_g2g_test_suite);
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index edd1e701aa1c..5904d658d1f2 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -70,7 +70,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
} } while (0)
static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test, u32 region)
+ struct kunit *test, u32 region, struct drm_exec *exec)
{
struct xe_device *xe = tile_to_xe(m->tile);
u64 retval, expected = 0;
@@ -84,14 +84,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
ttm_bo_type_kernel,
region |
XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED,
+ exec);
if (IS_ERR(remote)) {
KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n",
str, remote);
return;
}
- err = xe_bo_validate(remote, NULL, false);
+ err = xe_bo_validate(remote, NULL, false, exec);
if (err) {
KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n",
str, err);
@@ -161,13 +162,13 @@ out_unlock:
}
static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
- test_copy(m, bo, test, XE_BO_FLAG_SYSTEM);
+ test_copy(m, bo, test, XE_BO_FLAG_SYSTEM, exec);
}
static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
u32 region;
@@ -178,10 +179,11 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
region = XE_BO_FLAG_VRAM1;
else
region = XE_BO_FLAG_VRAM0;
- test_copy(m, bo, test, region);
+ test_copy(m, bo, test, region, exec);
}
-static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
+static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test,
+ struct drm_exec *exec)
{
struct xe_tile *tile = m->tile;
struct xe_device *xe = tile_to_xe(tile);
@@ -202,7 +204,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(big)) {
KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
goto vunmap;
@@ -210,7 +213,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(pt)) {
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
PTR_ERR(pt));
@@ -220,7 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
2 * SZ_4K,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(tiny)) {
KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n",
PTR_ERR(tiny));
@@ -290,10 +295,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
check(retval, expected, "Command clear small last value", test);
kunit_info(test, "Copying small buffer object to system\n");
- test_copy_sysmem(m, tiny, test);
+ test_copy_sysmem(m, tiny, exec, test);
if (xe->info.tile_count > 1) {
kunit_info(test, "Copying small buffer object to other vram\n");
- test_copy_vram(m, tiny, test);
+ test_copy_vram(m, tiny, exec, test);
}
/* Clear a big bo */
@@ -312,10 +317,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
check(retval, expected, "Command clear big last value", test);
kunit_info(test, "Copying big buffer object to system\n");
- test_copy_sysmem(m, big, test);
+ test_copy_sysmem(m, big, exec, test);
if (xe->info.tile_count > 1) {
kunit_info(test, "Copying big buffer object to other vram\n");
- test_copy_vram(m, big, test);
+ test_copy_vram(m, big, exec, test);
}
out:
@@ -343,10 +348,11 @@ static int migrate_test_run_device(struct xe_device *xe)
for_each_tile(tile, xe, id) {
struct xe_migrate *m = tile->migrate;
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
kunit_info(test, "Testing tile id %d.\n", id);
xe_vm_lock(m->q->vm, false);
- xe_migrate_sanity_test(m, test);
+ xe_migrate_sanity_test(m, test, exec);
xe_vm_unlock(m->q->vm);
}
@@ -490,7 +496,7 @@ err_sync:
static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
struct dma_fence *fence;
u64 expected, retval;
@@ -509,7 +515,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
dma_fence_put(fence);
kunit_info(test, "Evict vram buffer object\n");
- ret = xe_bo_evict(vram_bo);
+ ret = xe_bo_evict(vram_bo, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to evict bo.\n");
return;
@@ -538,7 +544,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
dma_fence_put(fence);
kunit_info(test, "Restore vram buffer object\n");
- ret = xe_bo_validate(vram_bo, NULL, false);
+ ret = xe_bo_validate(vram_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
return;
@@ -636,13 +642,14 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
{
struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL;
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+ struct drm_exec *exec;
long ret;
- sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ sys_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(sys_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -650,8 +657,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
return;
}
+ exec = XE_VALIDATION_OPT_OUT;
xe_bo_lock(sys_bo, false);
- ret = xe_bo_validate(sys_bo, NULL, false);
+ ret = xe_bo_validate(sys_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
goto free_sysbo;
@@ -664,10 +672,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_unlock(sys_bo);
- ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ ccs_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(ccs_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -676,7 +684,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_lock(ccs_bo, false);
- ret = xe_bo_validate(ccs_bo, NULL, false);
+ ret = xe_bo_validate(ccs_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
goto free_ccsbo;
@@ -689,10 +697,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_unlock(ccs_bo);
- vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ vram_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(vram_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(vram_bo));
@@ -700,7 +708,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_lock(vram_bo, false);
- ret = xe_bo_validate(vram_bo, NULL, false);
+ ret = xe_bo_validate(vram_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
goto free_vrambo;
@@ -713,7 +721,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
test_clear(xe, tile, sys_bo, vram_bo, test);
- test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test);
+ test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, exec, test);
xe_bo_unlock(vram_bo);
xe_bo_lock(vram_bo, false);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index db30c5156d0c..aa29ac759d5d 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -12,12 +12,219 @@
#include <kunit/test-bug.h>
#include <kunit/visibility.h>
+#define PLATFORM_CASE(platform__, graphics_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_NONE, \
+ .step = { .graphics = STEP_ ## graphics_step__ } \
+ }
+
+#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \
+ .step = { .graphics = STEP_ ## graphics_step__ } \
+ }
+
+#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \
+ media_verx100__, media_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_NONE, \
+ .graphics_verx100 = graphics_verx100__, \
+ .media_verx100 = media_verx100__, \
+ .step = { .graphics = STEP_ ## graphics_step__, \
+ .media = STEP_ ## media_step__ } \
+ }
+
+static const struct xe_pci_fake_data cases[] = {
+ PLATFORM_CASE(TIGERLAKE, B0),
+ PLATFORM_CASE(DG1, A0),
+ PLATFORM_CASE(DG1, B0),
+ PLATFORM_CASE(ALDERLAKE_S, A0),
+ PLATFORM_CASE(ALDERLAKE_S, B0),
+ PLATFORM_CASE(ALDERLAKE_S, C0),
+ PLATFORM_CASE(ALDERLAKE_S, D0),
+ PLATFORM_CASE(ALDERLAKE_P, A0),
+ PLATFORM_CASE(ALDERLAKE_P, B0),
+ PLATFORM_CASE(ALDERLAKE_P, C0),
+ SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
+ SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
+ SUBPLATFORM_CASE(DG2, G10, C0),
+ SUBPLATFORM_CASE(DG2, G11, B1),
+ SUBPLATFORM_CASE(DG2, G12, A1),
+ GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
+ GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
+ GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
+ GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
+ GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
+ GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1),
+ GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0),
+};
+
+KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc);
+
+/**
+ * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters
+ * @prev: the pointer to the previous parameter to iterate from or NULL
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
+ *
+ * This function prepares struct xe_pci_fake_data parameter.
+ *
+ * To be used only as a parameter generator function in &KUNIT_CASE_PARAM.
+ *
+ * Return: pointer to the next parameter or NULL if no more parameters
+ */
+const void *xe_pci_fake_data_gen_params(const void *prev, char *desc)
+{
+ return platform_gen_params(prev, desc);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_gen_params);
+
+static const struct xe_device_desc *lookup_desc(enum xe_platform p)
+{
+ const struct xe_device_desc *desc;
+ const struct pci_device_id *ids;
+
+ for (ids = pciidlist; ids->driver_data; ids++) {
+ desc = (const void *)ids->driver_data;
+ if (desc->platform == p)
+ return desc;
+ }
+ return NULL;
+}
+
+static const struct xe_subplatform_desc *lookup_sub_desc(enum xe_platform p, enum xe_subplatform s)
+{
+ const struct xe_device_desc *desc = lookup_desc(p);
+ const struct xe_subplatform_desc *spd;
+
+ if (desc && desc->subplatforms)
+ for (spd = desc->subplatforms; spd->subplatform; spd++)
+ if (spd->subplatform == s)
+ return spd;
+ return NULL;
+}
+
+static const char *lookup_platform_name(enum xe_platform p)
+{
+ const struct xe_device_desc *desc = lookup_desc(p);
+
+ return desc ? desc->platform_name : "INVALID";
+}
+
+static const char *__lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s)
+{
+ const struct xe_subplatform_desc *desc = lookup_sub_desc(p, s);
+
+ return desc ? desc->name : "INVALID";
+}
+
+static const char *lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s)
+{
+ return s == XE_SUBPLATFORM_NONE ? "" : __lookup_subplatform_name(p, s);
+}
+
+static const char *subplatform_prefix(enum xe_subplatform s)
+{
+ return s == XE_SUBPLATFORM_NONE ? "" : " ";
+}
+
+static const char *step_prefix(enum xe_step step)
+{
+ return step == STEP_NONE ? "" : " ";
+}
+
+static const char *step_name(enum xe_step step)
+{
+ return step == STEP_NONE ? "" : xe_step_name(step);
+}
+
+static const char *sriov_prefix(enum xe_sriov_mode mode)
+{
+ return mode <= XE_SRIOV_MODE_NONE ? "" : " ";
+}
+
+static const char *sriov_name(enum xe_sriov_mode mode)
+{
+ return mode <= XE_SRIOV_MODE_NONE ? "" : xe_sriov_mode_to_string(mode);
+}
+
+static const char *lookup_graphics_name(unsigned int verx100)
+{
+ const struct xe_ip *ip = find_graphics_ip(verx100);
+
+ return ip ? ip->name : "";
+}
+
+static const char *lookup_media_name(unsigned int verx100)
+{
+ const struct xe_ip *ip = find_media_ip(verx100);
+
+ return ip ? ip->name : "";
+}
+
+/**
+ * xe_pci_fake_data_desc - Describe struct xe_pci_fake_data parameter
+ * @param: the &struct xe_pci_fake_data parameter to describe
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
+ *
+ * This function prepares description of the struct xe_pci_fake_data parameter.
+ *
+ * It is tailored for use in parameterized KUnit tests where parameter generator
+ * is based on the struct xe_pci_fake_data arrays.
+ */
+void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc)
+{
+ if (param->graphics_verx100 || param->media_verx100)
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s %u.%02u(%s)%s%s %u.%02u(%s)%s%s%s%s",
+ lookup_platform_name(param->platform),
+ subplatform_prefix(param->subplatform),
+ lookup_subplatform_name(param->platform, param->subplatform),
+ param->graphics_verx100 / 100, param->graphics_verx100 % 100,
+ lookup_graphics_name(param->graphics_verx100),
+ step_prefix(param->step.graphics), step_name(param->step.graphics),
+ param->media_verx100 / 100, param->media_verx100 % 100,
+ lookup_media_name(param->media_verx100),
+ step_prefix(param->step.media), step_name(param->step.media),
+ sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode));
+ else
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s%s%s%s%s",
+ lookup_platform_name(param->platform),
+ subplatform_prefix(param->subplatform),
+ lookup_subplatform_name(param->platform, param->subplatform),
+ step_prefix(param->step.graphics), step_name(param->step.graphics),
+ sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode));
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_desc);
+
static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc)
{
snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s",
param->verx100 / 100, param->verx100 % 100, param->name);
}
+/*
+ * Pre-GMDID Graphics and Media IPs definitions.
+ *
+ * Mimic the way GMDID IPs are declared so the same
+ * param generator can be used for both
+ */
+static const struct xe_ip pre_gmdid_graphics_ips[] = {
+ graphics_ip_xelp,
+ graphics_ip_xelpp,
+ graphics_ip_xehpg,
+ graphics_ip_xehpc,
+};
+
+static const struct xe_ip pre_gmdid_media_ips[] = {
+ media_ip_xem,
+ media_ip_xehpm,
+};
+
+KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc);
+KUNIT_ARRAY_PARAM(pre_gmdid_media_ip, pre_gmdid_media_ips, xe_ip_kunit_desc);
+
KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc);
KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc);
@@ -46,6 +253,13 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc);
*/
const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc)
{
+ const void *next = pre_gmdid_graphics_ip_gen_params(prev, desc);
+
+ if (next)
+ return next;
+ if (is_insidevar(prev, pre_gmdid_graphics_ips))
+ prev = NULL;
+
return graphics_ip_gen_params(prev, desc);
}
EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param);
@@ -63,6 +277,13 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param);
*/
const void *xe_pci_media_ip_gen_param(const void *prev, char *desc)
{
+ const void *next = pre_gmdid_media_ip_gen_params(prev, desc);
+
+ if (next)
+ return next;
+ if (is_insidevar(prev, pre_gmdid_media_ips))
+ prev = NULL;
+
return media_ip_gen_params(prev, desc);
}
EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param);
@@ -94,10 +315,10 @@ static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type,
if (type == GMDID_MEDIA) {
*ver = data->media_verx100;
- *revid = xe_step_to_gmdid(data->media_step);
+ *revid = xe_step_to_gmdid(data->step.media);
} else {
*ver = data->graphics_verx100;
- *revid = xe_step_to_gmdid(data->graphics_step);
+ *revid = xe_step_to_gmdid(data->step.graphics);
}
}
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index ce4d2b86b778..5e9a7ffc747f 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -10,6 +10,7 @@
#include "xe_platform_types.h"
#include "xe_sriov_types.h"
+#include "xe_step_types.h"
struct xe_device;
@@ -17,13 +18,14 @@ struct xe_pci_fake_data {
enum xe_sriov_mode sriov_mode;
enum xe_platform platform;
enum xe_subplatform subplatform;
+ struct xe_step_info step;
u32 graphics_verx100;
u32 media_verx100;
- u32 graphics_step;
- u32 media_step;
};
int xe_pci_fake_device_init(struct xe_device *xe);
+const void *xe_pci_fake_data_gen_params(const void *prev, char *desc);
+void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc);
const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc);
const void *xe_pci_media_ip_gen_param(const void *prev, char *desc);
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 416258c193f6..49d191043dfa 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -15,87 +15,10 @@
#include "xe_tuning.h"
#include "xe_wa.h"
-struct platform_test_case {
- const char *name;
- enum xe_platform platform;
- enum xe_subplatform subplatform;
- u32 graphics_verx100;
- u32 media_verx100;
- struct xe_step_info step;
-};
-
-#define PLATFORM_CASE(platform__, graphics_step__) \
- { \
- .name = #platform__ " (" #graphics_step__ ")", \
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_NONE, \
- .step = { .graphics = STEP_ ## graphics_step__ } \
- }
-
-
-#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \
- { \
- .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \
- .step = { .graphics = STEP_ ## graphics_step__ } \
- }
-
-#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \
- media_verx100__, media_step__) \
- { \
- .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_NONE, \
- .graphics_verx100 = graphics_verx100__, \
- .media_verx100 = media_verx100__, \
- .step = { .graphics = STEP_ ## graphics_step__, \
- .media = STEP_ ## media_step__ } \
- }
-
-static const struct platform_test_case cases[] = {
- PLATFORM_CASE(TIGERLAKE, B0),
- PLATFORM_CASE(DG1, A0),
- PLATFORM_CASE(DG1, B0),
- PLATFORM_CASE(ALDERLAKE_S, A0),
- PLATFORM_CASE(ALDERLAKE_S, B0),
- PLATFORM_CASE(ALDERLAKE_S, C0),
- PLATFORM_CASE(ALDERLAKE_S, D0),
- PLATFORM_CASE(ALDERLAKE_P, A0),
- PLATFORM_CASE(ALDERLAKE_P, B0),
- PLATFORM_CASE(ALDERLAKE_P, C0),
- SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
- SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
- SUBPLATFORM_CASE(DG2, G10, C0),
- SUBPLATFORM_CASE(DG2, G11, B1),
- SUBPLATFORM_CASE(DG2, G12, A1),
- GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
- GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
- GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
- GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
- GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
- GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1),
- GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0),
-};
-
-static void platform_desc(const struct platform_test_case *t, char *desc)
-{
- strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
-}
-
-KUNIT_ARRAY_PARAM(platform, cases, platform_desc);
-
static int xe_wa_test_init(struct kunit *test)
{
- const struct platform_test_case *param = test->param_value;
- struct xe_pci_fake_data data = {
- .platform = param->platform,
- .subplatform = param->subplatform,
- .graphics_verx100 = param->graphics_verx100,
- .media_verx100 = param->media_verx100,
- .graphics_step = param->step.graphics,
- .media_step = param->step.media,
- };
+ const struct xe_pci_fake_data *param = test->param_value;
+ struct xe_pci_fake_data data = *param;
struct xe_device *xe;
struct device *dev;
int ret;
@@ -120,13 +43,6 @@ static int xe_wa_test_init(struct kunit *test)
return 0;
}
-static void xe_wa_test_exit(struct kunit *test)
-{
- struct xe_device *xe = test->priv;
-
- drm_kunit_helper_free_device(test, xe->drm.dev);
-}
-
static void xe_wa_gt(struct kunit *test)
{
struct xe_device *xe = test->priv;
@@ -144,14 +60,13 @@ static void xe_wa_gt(struct kunit *test)
}
static struct kunit_case xe_wa_tests[] = {
- KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params),
+ KUNIT_CASE_PARAM(xe_wa_gt, xe_pci_fake_data_gen_params),
{}
};
static struct kunit_suite xe_rtp_test_suite = {
.name = "xe_wa",
.init = xe_wa_test_init,
- .exit = xe_wa_test_exit,
.test_cases = xe_wa_tests,
};
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index feb6e013dc38..6d20229c11de 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -64,7 +64,7 @@ struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
{
struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
- struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = gt_to_xe(gt);
struct xe_sa_manager *bb_pool;
int err;
@@ -78,7 +78,7 @@ struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
* So, this extra DW acts as a guard here.
*/
- bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool;
+ bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1));
if (IS_ERR(bb->bo)) {
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index ebcd191034df..edc6e29f0cc1 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -974,11 +974,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
* CCS meta data is migrated from TT -> SMEM. So, let us detach the
* BBs from BO as it is no longer needed.
*/
- if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT &&
+ if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT &&
new_mem->mem_type == XE_PL_SYSTEM)
xe_sriov_vf_ccs_detach_bo(bo);
- if (IS_SRIOV_VF(xe) &&
+ if (IS_VF_CCS_READY(xe) &&
((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
(old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
handle_system_ccs)
@@ -994,7 +994,7 @@ out:
if (timeout < 0)
ret = timeout;
- if (IS_VF_CCS_BB_VALID(xe, bo))
+ if (IS_VF_CCS_READY(xe))
xe_sriov_vf_ccs_detach_bo(bo);
xe_tt_unmap_sg(xe, ttm_bo->ttm);
@@ -1141,42 +1141,47 @@ out_unref:
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
{
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_bo *backup;
int ret = 0;
- xe_bo_lock(bo, false);
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ xe_assert(xe, !ret);
+ xe_assert(xe, !bo->backup_obj);
- xe_assert(xe, !bo->backup_obj);
+ /*
+ * Since this is called from the PM notifier we might have raced with
+ * someone unpinning this after we dropped the pinned list lock and
+ * grabbing the above bo lock.
+ */
+ if (!xe_bo_is_pinned(bo))
+ break;
- /*
- * Since this is called from the PM notifier we might have raced with
- * someone unpinning this after we dropped the pinned list lock and
- * grabbing the above bo lock.
- */
- if (!xe_bo_is_pinned(bo))
- goto out_unlock_bo;
+ if (!xe_bo_is_vram(bo))
+ break;
- if (!xe_bo_is_vram(bo))
- goto out_unlock_bo;
+ if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
+ break;
- if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
- goto out_unlock_bo;
+ backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
+ DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED, &exec);
+ if (IS_ERR(backup)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(backup);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
- backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
- DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
- if (IS_ERR(backup)) {
- ret = PTR_ERR(backup);
- goto out_unlock_bo;
+ backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
+ ttm_bo_pin(&backup->ttm);
+ bo->backup_obj = backup;
}
- backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
- ttm_bo_pin(&backup->ttm);
- bo->backup_obj = backup;
-
-out_unlock_bo:
- xe_bo_unlock(bo);
return ret;
}
@@ -1202,57 +1207,12 @@ int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
return 0;
}
-/**
- * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
- * @bo: The buffer object to move.
- *
- * On successful completion, the object memory will be moved to system memory.
- *
- * This is needed to for special handling of pinned VRAM object during
- * suspend-resume.
- *
- * Return: 0 on success. Negative error code on failure.
- */
-int xe_bo_evict_pinned(struct xe_bo *bo)
+static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup)
{
- struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
- struct xe_bo *backup = bo->backup_obj;
- bool backup_created = false;
+ struct xe_device *xe = xe_bo_device(bo);
bool unmap = false;
int ret = 0;
- xe_bo_lock(bo, false);
-
- if (WARN_ON(!bo->ttm.resource)) {
- ret = -EINVAL;
- goto out_unlock_bo;
- }
-
- if (WARN_ON(!xe_bo_is_pinned(bo))) {
- ret = -EINVAL;
- goto out_unlock_bo;
- }
-
- if (!xe_bo_is_vram(bo))
- goto out_unlock_bo;
-
- if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
- goto out_unlock_bo;
-
- if (!backup) {
- backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv,
- NULL, xe_bo_size(bo),
- DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
- if (IS_ERR(backup)) {
- ret = PTR_ERR(backup);
- goto out_unlock_bo;
- }
- backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
- backup_created = true;
- }
-
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
struct xe_migrate *migrate;
struct dma_fence *fence;
@@ -1262,14 +1222,11 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
else
migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
+ xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv);
ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
if (ret)
goto out_backup;
- ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
- if (ret)
- goto out_backup;
-
fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
backup->ttm.resource, false);
if (IS_ERR(fence)) {
@@ -1279,8 +1236,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
dma_resv_add_fence(bo->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
- dma_resv_add_fence(backup->ttm.base.resv, fence,
- DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
} else {
ret = xe_bo_vmap(backup);
@@ -1290,7 +1245,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (iosys_map_is_null(&bo->vmap)) {
ret = xe_bo_vmap(bo);
if (ret)
- goto out_backup;
+ goto out_vunmap;
unmap = true;
}
@@ -1300,15 +1255,78 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (!bo->backup_obj)
bo->backup_obj = backup;
-
-out_backup:
+out_vunmap:
xe_bo_vunmap(backup);
- if (ret && backup_created)
- xe_bo_put(backup);
-out_unlock_bo:
+out_backup:
if (unmap)
xe_bo_vunmap(bo);
- xe_bo_unlock(bo);
+
+ return ret;
+}
+
+/**
+ * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved to system memory.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict_pinned(struct xe_bo *bo)
+{
+ struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *backup = bo->backup_obj;
+ bool backup_created = false;
+ int ret = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ xe_assert(xe, !ret);
+
+ if (WARN_ON(!bo->ttm.resource)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (WARN_ON(!xe_bo_is_pinned(bo))) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!xe_bo_is_vram(bo))
+ break;
+
+ if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
+ break;
+
+ if (!backup) {
+ backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL,
+ xe_bo_size(bo),
+ DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED, &exec);
+ if (IS_ERR(backup)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(backup);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
+ backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
+ backup_created = true;
+ }
+
+ ret = xe_bo_evict_pinned_copy(bo, backup);
+ }
+
+ if (ret && backup_created)
+ xe_bo_put(backup);
+
return ret;
}
@@ -1358,10 +1376,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (ret)
goto out_unlock_bo;
- ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
- if (ret)
- goto out_unlock_bo;
-
fence = xe_migrate_copy(migrate, backup, bo,
backup->ttm.resource, bo->ttm.resource,
false);
@@ -1372,8 +1386,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
dma_resv_add_fence(bo->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
- dma_resv_add_fence(backup->ttm.base.resv, fence,
- DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
} else {
ret = xe_bo_vmap(backup);
@@ -1529,7 +1541,7 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
if (!xe_bo_is_xe_bo(ttm_bo))
return;
- if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo))
+ if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev)))
xe_sriov_vf_ccs_detach_bo(bo);
/*
@@ -1725,65 +1737,234 @@ static bool should_migrate_to_smem(struct xe_bo *bo)
bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
}
-static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
+/* Populate the bo if swapped out, or migrate if the access mode requires that. */
+static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
+ struct drm_exec *exec)
+{
+ struct ttm_buffer_object *tbo = &bo->ttm;
+ int err = 0;
+
+ if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) {
+ xe_assert(xe_bo_device(bo),
+ dma_resv_test_signaled(tbo->base.resv, DMA_RESV_USAGE_KERNEL) ||
+ (tbo->ttm && ttm_tt_is_populated(tbo->ttm)));
+ err = ttm_bo_populate(&bo->ttm, ctx);
+ } else if (should_migrate_to_smem(bo)) {
+ xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM);
+ err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec);
+ }
+
+ return err;
+}
+
+/* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */
+static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo)
+{
+ vm_fault_t ret;
+
+ trace_xe_bo_cpu_fault(bo);
+
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
+ /*
+ * When TTM is actually called to insert PTEs, ensure no blocking conditions
+ * remain, in which case TTM may drop locks and return VM_FAULT_RETRY.
+ */
+ xe_assert(xe, ret != VM_FAULT_RETRY);
+
+ if (ret == VM_FAULT_NOPAGE &&
+ mem_type_is_vram(bo->ttm.resource->mem_type)) {
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (list_empty(&bo->vram_userfault_link))
+ list_add(&bo->vram_userfault_link,
+ &xe->mem_access.vram_userfault.list);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ }
+
+ return ret;
+}
+
+static vm_fault_t xe_err_to_fault_t(int err)
+{
+ switch (err) {
+ case 0:
+ case -EINTR:
+ case -ERESTARTSYS:
+ case -EAGAIN:
+ return VM_FAULT_NOPAGE;
+ case -ENOMEM:
+ case -ENOSPC:
+ return VM_FAULT_OOM;
+ default:
+ break;
+ }
+ return VM_FAULT_SIGBUS;
+}
+
+static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo)
+{
+ dma_resv_assert_held(tbo->base.resv);
+
+ return tbo->ttm &&
+ (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) ==
+ TTM_TT_FLAG_EXTERNAL;
+}
+
+static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe,
+ struct xe_bo *bo, bool needs_rpm)
+{
+ struct ttm_buffer_object *tbo = &bo->ttm;
+ vm_fault_t ret = VM_FAULT_RETRY;
+ struct xe_validation_ctx ctx;
+ struct ttm_operation_ctx tctx = {
+ .interruptible = true,
+ .no_wait_gpu = true,
+ .gfp_retry_mayfail = true,
+
+ };
+ int err;
+
+ if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
+ return VM_FAULT_RETRY;
+
+ err = xe_validation_ctx_init(&ctx, &xe->val, NULL,
+ (struct xe_val_flags) {
+ .interruptible = true,
+ .no_block = true
+ });
+ if (err)
+ goto out_pm;
+
+ if (!dma_resv_trylock(tbo->base.resv))
+ goto out_validation;
+
+ if (xe_ttm_bo_is_imported(tbo)) {
+ ret = VM_FAULT_SIGBUS;
+ drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
+ goto out_unlock;
+ }
+
+ err = xe_bo_fault_migrate(bo, &tctx, NULL);
+ if (err) {
+ /* Return VM_FAULT_RETRY on these errors. */
+ if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY)
+ ret = xe_err_to_fault_t(err);
+ goto out_unlock;
+ }
+
+ if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL))
+ ret = __xe_bo_cpu_fault(vmf, xe, bo);
+
+out_unlock:
+ dma_resv_unlock(tbo->base.resv);
+out_validation:
+ xe_validation_ctx_fini(&ctx);
+out_pm:
+ if (needs_rpm)
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
struct drm_device *ddev = tbo->base.dev;
struct xe_device *xe = to_xe_device(ddev);
struct xe_bo *bo = ttm_to_xe_bo(tbo);
bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
+ bool retry_after_wait = false;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
vm_fault_t ret;
- int idx, r = 0;
+ int err = 0;
+ int idx;
- if (needs_rpm)
- xe_pm_runtime_get(xe);
+ if (!drm_dev_enter(&xe->drm, &idx))
+ return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
- ret = ttm_bo_vm_reserve(tbo, vmf);
- if (ret)
+ ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm);
+ if (ret != VM_FAULT_RETRY)
goto out;
- if (drm_dev_enter(ddev, &idx)) {
- trace_xe_bo_cpu_fault(bo);
+ if (fault_flag_allow_retry_first(vmf->flags)) {
+ if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
+ goto out;
+ retry_after_wait = true;
+ xe_bo_get(bo);
+ mmap_read_unlock(vmf->vma->vm_mm);
+ } else {
+ ret = VM_FAULT_NOPAGE;
+ }
+
+ /*
+ * The fastpath failed and we were not required to return and retry immediately.
+ * We're now running in one of two modes:
+ *
+ * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying
+ * to resolve blocking waits. But we can't resolve the fault since the
+ * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath
+ * should succeed. But it may fail since we drop the bo lock.
+ *
+ * 2) retry_after_wait == false: The fastpath failed, typically even after
+ * a retry. Do whatever's necessary to resolve the fault.
+ *
+ * This construct is recommended to avoid excessive waits under the mmap_lock.
+ */
+
+ if (needs_rpm)
+ xe_pm_runtime_get(xe);
- if (should_migrate_to_smem(bo)) {
- xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM);
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ struct ttm_operation_ctx tctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ .gfp_retry_mayfail = retry_after_wait,
+ };
+ long lerr;
+
+ err = drm_exec_lock_obj(&exec, &tbo->base);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
- r = xe_bo_migrate(bo, XE_PL_TT);
- if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
- ret = VM_FAULT_NOPAGE;
- else if (r)
- ret = VM_FAULT_SIGBUS;
+ if (xe_ttm_bo_is_imported(tbo)) {
+ err = -EFAULT;
+ drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
+ break;
}
- if (!ret)
- ret = ttm_bo_vm_fault_reserved(vmf,
- vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
- drm_dev_exit(idx);
- if (ret == VM_FAULT_RETRY &&
- !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
- goto out;
+ err = xe_bo_fault_migrate(bo, &tctx, &exec);
+ if (err) {
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
- /*
- * ttm_bo_vm_reserve() already has dma_resv_lock.
- */
- if (ret == VM_FAULT_NOPAGE &&
- mem_type_is_vram(tbo->resource->mem_type)) {
- mutex_lock(&xe->mem_access.vram_userfault.lock);
- if (list_empty(&bo->vram_userfault_link))
- list_add(&bo->vram_userfault_link,
- &xe->mem_access.vram_userfault.list);
- mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ lerr = dma_resv_wait_timeout(tbo->base.resv,
+ DMA_RESV_USAGE_KERNEL, true,
+ MAX_SCHEDULE_TIMEOUT);
+ if (lerr < 0) {
+ err = lerr;
+ break;
}
- } else {
- ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+
+ if (!retry_after_wait)
+ ret = __xe_bo_cpu_fault(vmf, xe, bo);
}
+ /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */
+ if (err && !retry_after_wait)
+ ret = xe_err_to_fault_t(err);
- dma_resv_unlock(tbo->base.resv);
-out:
if (needs_rpm)
xe_pm_runtime_put(xe);
+ if (retry_after_wait)
+ xe_bo_put(bo);
+out:
+ drm_dev_exit(idx);
+
return ret;
}
@@ -1827,7 +2008,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
}
static const struct vm_operations_struct xe_gem_vm_ops = {
- .fault = xe_gem_fault,
+ .fault = xe_bo_cpu_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
.access = xe_bo_vm_access,
@@ -1875,11 +2056,32 @@ void xe_bo_free(struct xe_bo *bo)
kfree(bo);
}
-struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
- struct xe_tile *tile, struct dma_resv *resv,
- struct ttm_lru_bulk_move *bulk, size_t size,
- u16 cpu_caching, enum ttm_bo_type type,
- u32 flags)
+/**
+ * xe_bo_init_locked() - Initialize or create an xe_bo.
+ * @xe: The xe device.
+ * @bo: An already allocated buffer object or NULL
+ * if the function should allocate a new one.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @resv: Pointer to a locked shared reservation object to use fo this bo,
+ * or NULL for the xe_bo to use its own.
+ * @bulk: The bulk move to use for LRU bumping, or NULL for external bos.
+ * @size: The storage size to use for the bo.
+ * @cpu_caching: The cpu caching used for system memory backing store.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Initialize or create an xe buffer object. On failure, any allocated buffer
+ * object passed in @bo will have been unreferenced.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
+ struct xe_tile *tile, struct dma_resv *resv,
+ struct ttm_lru_bulk_move *bulk, size_t size,
+ u16 cpu_caching, enum ttm_bo_type type,
+ u32 flags, struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = true,
@@ -1948,6 +2150,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
ctx.resv = resv;
}
+ xe_validation_assert_exec(xe, exec, &bo->ttm.base);
if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
if (WARN_ON(err)) {
@@ -2049,7 +2252,7 @@ __xe_bo_create_locked(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
u16 cpu_caching, enum ttm_bo_type type, u32 flags,
- u64 alignment)
+ u64 alignment, struct drm_exec *exec)
{
struct xe_bo *bo = NULL;
int err;
@@ -2070,11 +2273,11 @@ __xe_bo_create_locked(struct xe_device *xe,
}
}
- bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
- vm && !xe_vm_in_fault_mode(vm) &&
- flags & XE_BO_FLAG_USER ?
- &vm->lru_bulk_move : NULL, size,
- cpu_caching, type, flags);
+ bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
+ vm && !xe_vm_in_fault_mode(vm) &&
+ flags & XE_BO_FLAG_USER ?
+ &vm->lru_bulk_move : NULL, size,
+ cpu_caching, type, flags, exec);
if (IS_ERR(bo))
return bo;
@@ -2108,9 +2311,10 @@ __xe_bo_create_locked(struct xe_device *xe,
if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
- start + xe_bo_size(bo), U64_MAX);
+ start + xe_bo_size(bo), U64_MAX,
+ exec);
} else {
- err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
+ err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec);
}
if (err)
goto err_unlock_put_bo;
@@ -2127,82 +2331,166 @@ err_unlock_put_bo:
return ERR_PTR(err);
}
-struct xe_bo *
-xe_bo_create_locked_range(struct xe_device *xe,
- struct xe_tile *tile, struct xe_vm *vm,
- size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags, u64 alignment)
-{
- return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
- flags, alignment);
-}
-
+/**
+ * xe_bo_create_locked() - Create a BO
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @vm: The local vm or NULL for external objects.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Create a locked xe BO with no range- nor alignment restrictions.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec)
{
return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
- flags, 0);
+ flags, 0, exec);
}
-struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- u16 cpu_caching,
- u32 flags)
+static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u16 cpu_caching,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment, bool intr)
{
- struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
- cpu_caching, ttm_bo_type_device,
- flags | XE_BO_FLAG_USER, 0);
- if (!IS_ERR(bo))
- xe_bo_unlock_vm_held(bo);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int ret = 0;
- return bo;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
+ ret) {
+ bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL,
+ cpu_caching, type, flags, alignment, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ } else {
+ xe_bo_unlock(bo);
+ }
+ }
+
+ return ret ? ERR_PTR(ret) : bo;
}
-struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+/**
+ * xe_bo_create_user() - Create a user BO
+ * @xe: The xe device.
+ * @vm: The local vm or NULL for external objects.
+ * @size: The storage size to use for the bo.
+ * @cpu_caching: The caching mode to be used for system backing store.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL
+ * if such a transaction should be initiated by the call.
+ *
+ * Create a bo on behalf of user-space.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_create_user(struct xe_device *xe,
+ struct xe_vm *vm, size_t size,
+ u16 cpu_caching,
+ u32 flags, struct drm_exec *exec)
{
- struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
+ struct xe_bo *bo;
+
+ flags |= XE_BO_FLAG_USER;
- if (!IS_ERR(bo))
- xe_bo_unlock_vm_held(bo);
+ if (vm || exec) {
+ xe_assert(xe, exec);
+ bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL,
+ cpu_caching, ttm_bo_type_device,
+ flags, 0, exec);
+ if (!IS_ERR(bo))
+ xe_bo_unlock_vm_held(bo);
+ } else {
+ bo = xe_bo_create_novm(xe, NULL, size, cpu_caching,
+ ttm_bo_type_device, flags, 0, true);
+ }
return bo;
}
-struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags)
+/**
+ * xe_bo_create_pin_range_novm() - Create and pin a BO with range options.
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @start: Start of fixed VRAM range or 0.
+ * @end: End of fixed VRAM range or ~0ULL.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ *
+ * Create an Xe BO with range- and options. If @start and @end indicate
+ * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement
+ * only.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 start, u64 end,
+ enum ttm_bo_type type, u32 flags)
{
- return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
- type, flags, 0);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int err = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end,
+ 0, type, flags, 0, &exec);
+ if (IS_ERR(bo)) {
+ drm_exec_retry_on_contention(&exec);
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+
+ err = xe_bo_pin(bo, &exec);
+ xe_bo_unlock(bo);
+ if (err) {
+ xe_bo_put(bo);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
+
+ return err ? ERR_PTR(err) : bo;
}
-struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
- struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags,
- u64 alignment)
+static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment, struct drm_exec *exec)
{
struct xe_bo *bo;
int err;
u64 start = offset == ~0ull ? 0 : offset;
- u64 end = offset == ~0ull ? offset : start + size;
+ u64 end = offset == ~0ull ? ~0ull : start + size;
if (flags & XE_BO_FLAG_STOLEN &&
xe_ttm_stolen_cpu_access_needs_ggtt(xe))
flags |= XE_BO_FLAG_GGTT;
- bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
- alignment);
+ bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
+ alignment, exec);
if (IS_ERR(bo))
return bo;
- err = xe_bo_pin(bo);
+ err = xe_bo_pin(bo, exec);
if (err)
goto err_put;
@@ -2222,11 +2510,100 @@ err_put:
return ERR_PTR(err);
}
+/**
+ * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @offset: Optional VRAM offset or %~0ull for don't care.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @alignment: GGTT alignment.
+ * @intr: Whether to execute any waits for backing store interruptible.
+ *
+ * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment
+ * options. The bo will be external and not associated with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
+ * to true on entry.
+ */
+struct xe_bo *
+xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 offset, enum ttm_bo_type type, u32 flags,
+ u64 alignment, bool intr)
+{
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int ret = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
+ ret) {
+ bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset,
+ type, flags, alignment, &exec);
+ if (IS_ERR(bo)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ }
+ }
+
+ return ret ? ERR_PTR(ret) : bo;
+}
+
+/**
+ * xe_bo_create_pin_map() - Create pinned and mapped bo
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * @vm: The vm to associate the buffer object with. The vm's resv must be locked
+ * with the transaction represented by @exec.
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction, and
+ * previously used for locking @vm's resv.
+ *
+ * Create a pinned and mapped bo. The bo will be external and not associated
+ * with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @exec was
+ * configured for interruptible locking.
+ */
struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec)
+{
+ return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags,
+ 0, exec);
+}
+
+/**
+ * xe_bo_create_pin_map_novm() - Create pinned and mapped bo
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @intr: Whether to execut any waits for backing store interruptible.
+ *
+ * Create a pinned and mapped bo. The bo will be external and not associated
+ * with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
+ * to true on entry.
+ */
+struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, enum ttm_bo_type type, u32 flags,
+ bool intr)
{
- return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
+ return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr);
}
static void __xe_bo_unpin_map_no_vm(void *arg)
@@ -2241,8 +2618,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
int ret;
KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
-
- bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
+ bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true);
if (IS_ERR(bo))
return bo;
@@ -2253,6 +2629,11 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
return bo;
}
+void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo)
+{
+ devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo);
+}
+
struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size, u32 flags)
{
@@ -2325,6 +2706,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
* xe_bo_pin_external - pin an external BO
* @bo: buffer object to be pinned
* @in_place: Pin in current placement, don't attempt to migrate.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
* BO. Unique call compared to xe_bo_pin as this function has it own set of
@@ -2332,7 +2714,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
*
* Returns 0 for success, negative error code otherwise.
*/
-int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec)
{
struct xe_device *xe = xe_bo_device(bo);
int err;
@@ -2342,7 +2724,7 @@ int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
if (!xe_bo_is_pinned(bo)) {
if (!in_place) {
- err = xe_bo_validate(bo, NULL, false);
+ err = xe_bo_validate(bo, NULL, false, exec);
if (err)
return err;
}
@@ -2365,7 +2747,17 @@ int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
return 0;
}
-int xe_bo_pin(struct xe_bo *bo)
+/**
+ * xe_bo_pin() - Pin a kernel bo after potentially migrating it
+ * @bo: The kernel bo to pin.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Attempts to migrate a bo to @bo->placement. If that succeeds,
+ * pins the bo.
+ *
+ * Return: %0 on success, negative error code on migration failure.
+ */
+int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec)
{
struct ttm_place *place = &bo->placements[0];
struct xe_device *xe = xe_bo_device(bo);
@@ -2387,7 +2779,7 @@ int xe_bo_pin(struct xe_bo *bo)
/* We only expect at most 1 pin */
xe_assert(xe, !xe_bo_is_pinned(bo));
- err = xe_bo_validate(bo, NULL, false);
+ err = xe_bo_validate(bo, NULL, false, exec);
if (err)
return err;
@@ -2480,6 +2872,7 @@ void xe_bo_unpin(struct xe_bo *bo)
* NULL. Used together with @allow_res_evict.
* @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
* reservation object.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Make sure the bo is in allowed placement, migrating it if necessary. If
* needed, other bos will be evicted. If bos selected for eviction shares
@@ -2489,7 +2882,8 @@ void xe_bo_unpin(struct xe_bo *bo)
* Return: 0 on success, negative error code on failure. May return
* -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
*/
-int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
+ struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = true,
@@ -2511,6 +2905,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
xe_vm_set_validating(vm, allow_res_evict);
trace_xe_bo_validate(bo);
+ xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
xe_vm_clear_validating(vm, allow_res_evict);
@@ -2706,8 +3101,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_gem_create *args = data;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_vm *vm = NULL;
- ktime_t end = 0;
struct xe_bo *bo;
unsigned int bo_flags;
u32 handle;
@@ -2781,25 +3177,26 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
}
-retry:
- if (vm) {
- err = xe_vm_lock(vm, true);
- if (err)
- goto out_vm;
+ err = 0;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ if (vm) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
+ }
+ bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching,
+ bo_flags, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
}
-
- bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
- bo_flags);
-
- if (vm)
- xe_vm_unlock(vm);
-
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- if (xe_vm_validate_should_retry(NULL, err, &end))
- goto retry;
+ if (err)
goto out_vm;
- }
if (args->extensions) {
err = gem_create_user_extensions(xe, bo, args->extensions, 0);
@@ -2948,6 +3345,9 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
* xe_bo_migrate - Migrate an object to the desired region id
* @bo: The buffer object to migrate.
* @mem_type: The TTM region type to migrate to.
+ * @tctx: A pointer to a struct ttm_operation_ctx or NULL if
+ * a default interruptibe ctx is to be used.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Attempt to migrate the buffer object to the desired memory region. The
* buffer object may not be pinned, and must be locked.
@@ -2959,7 +3359,8 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
* Return: 0 on success. Negative error code on failure. In particular may
* return -EINTR or -ERESTARTSYS if signal pending.
*/
-int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx,
+ struct drm_exec *exec)
{
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
struct ttm_operation_ctx ctx = {
@@ -2971,6 +3372,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
struct ttm_place requested;
xe_bo_assert_held(bo);
+ tctx = tctx ? tctx : &ctx;
if (bo->ttm.resource->mem_type == mem_type)
return 0;
@@ -2997,19 +3399,22 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
}
- return ttm_bo_validate(&bo->ttm, &placement, &ctx);
+ if (!tctx->no_wait_gpu)
+ xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
+ return ttm_bo_validate(&bo->ttm, &placement, tctx);
}
/**
* xe_bo_evict - Evict an object to evict placement
* @bo: The buffer object to migrate.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* On successful completion, the object memory will be moved to evict
* placement. This function blocks until the object has been fully moved.
*
* Return: 0 on success. Negative error code on failure.
*/
-int xe_bo_evict(struct xe_bo *bo)
+int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = false,
@@ -3169,11 +3574,11 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
page_size);
- bo = xe_bo_create_user(xe, NULL, NULL, args->size,
+ bo = xe_bo_create_user(xe, NULL, args->size,
DRM_XE_GEM_CPU_CACHING_WC,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index cfb1ec266a6d..a77af42b5f9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -10,6 +10,7 @@
#include "xe_bo_types.h"
#include "xe_macros.h"
+#include "xe_validation.h"
#include "xe_vm_types.h"
#include "xe_vm.h"
#include "xe_vram_types.h"
@@ -88,40 +89,34 @@ struct sg_table;
struct xe_bo *xe_bo_alloc(void);
void xe_bo_free(struct xe_bo *bo);
-struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
- struct xe_tile *tile, struct dma_resv *resv,
- struct ttm_lru_bulk_move *bulk, size_t size,
- u16 cpu_caching, enum ttm_bo_type type,
- u32 flags);
-struct xe_bo *
-xe_bo_create_locked_range(struct xe_device *xe,
- struct xe_tile *tile, struct xe_vm *vm,
- size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags, u64 alignment);
+struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
+ struct xe_tile *tile, struct dma_resv *resv,
+ struct ttm_lru_bulk_move *bulk, size_t size,
+ u16 cpu_caching, enum ttm_bo_type type,
+ u32 flags, struct drm_exec *exec);
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- u16 cpu_caching,
- u32 flags);
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec);
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_vm *vm, size_t size,
+ u16 cpu_caching, u32 flags, struct drm_exec *exec);
struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
- struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags,
- u64 alignment);
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec);
+struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, enum ttm_bo_type type, u32 flags,
+ bool intr);
+struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 start, u64 end,
+ enum ttm_bo_type type, u32 flags);
+struct xe_bo *
+xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 offset, enum ttm_bo_type type,
+ u32 flags, u64 alignment, bool intr);
struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
size_t size, u32 flags);
+void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo);
struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size, u32 flags);
int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src);
@@ -200,11 +195,12 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
}
}
-int xe_bo_pin_external(struct xe_bo *bo, bool in_place);
-int xe_bo_pin(struct xe_bo *bo);
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec);
+int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec);
void xe_bo_unpin_external(struct xe_bo *bo);
void xe_bo_unpin(struct xe_bo *bo);
-int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict);
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
+ struct drm_exec *exec);
static inline bool xe_bo_is_pinned(struct xe_bo *bo)
{
@@ -285,8 +281,9 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res);
bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
-int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
-int xe_bo_evict(struct xe_bo *bo);
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc,
+ struct drm_exec *exec);
+int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec);
int xe_bo_evict_pinned(struct xe_bo *bo);
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo);
@@ -315,6 +312,21 @@ static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
return PAGE_ALIGN(xe_bo_size(bo));
}
+/**
+ * xe_bo_has_valid_ccs_bb - Check if CCS's BBs were setup for the BO.
+ * @bo: the &xe_bo to check
+ *
+ * The CCS's BBs should only be setup by the driver VF, but it is safe
+ * to call this function also by non-VF driver.
+ *
+ * Return: true iff the CCS's BBs are setup, false otherwise.
+ */
+static inline bool xe_bo_has_valid_ccs_bb(struct xe_bo *bo)
+{
+ return bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] &&
+ bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX];
+}
+
static inline bool xe_bo_has_pages(struct xe_bo *bo)
{
if ((bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) ||
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 314652afdca7..d4fe3c8dca5b 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -25,7 +25,9 @@ struct xe_vm;
/* TODO: To be selected with VM_MADVISE */
#define XE_BO_PRIORITY_NORMAL 1
-/** @xe_bo: XE buffer object */
+/**
+ * struct xe_bo - Xe buffer object
+ */
struct xe_bo {
/** @ttm: TTM base buffer object */
struct ttm_buffer_object ttm;
@@ -47,7 +49,7 @@ struct xe_bo {
struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE];
/** @vmap: iosys map of this buffer */
struct iosys_map vmap;
- /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
+ /** @kmap: TTM bo kmap object for internal use only. Keep off. */
struct ttm_bo_kmap_obj kmap;
/** @pinned_link: link to present / evicted list of pinned BO */
struct list_head pinned_link;
@@ -82,10 +84,10 @@ struct xe_bo {
/** @created: Whether the bo has passed initial creation */
bool created;
- /** @ccs_cleared */
+ /** @ccs_cleared: true means that CCS region of BO is already cleared */
bool ccs_cleared;
- /** @bb_ccs_rw: BB instructions of CCS read/write. Valid only for VF */
+ /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
/**
@@ -99,9 +101,10 @@ struct xe_bo {
struct drm_pagemap_devmem devmem_allocation;
/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
- struct list_head vram_userfault_link;
+ struct list_head vram_userfault_link;
- /** @min_align: minimum alignment needed for this BO if different
+ /**
+ * @min_align: minimum alignment needed for this BO if different
* from default
*/
u64 min_align;
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index 1025d3979b06..8a9b950e7a6d 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -4,6 +4,7 @@
*/
#include <linux/bitops.h>
+#include <linux/ctype.h>
#include <linux/configfs.h>
#include <linux/cleanup.h>
#include <linux/find.h>
@@ -12,6 +13,7 @@
#include <linux/pci.h>
#include <linux/string.h>
+#include "instructions/xe_mi_commands.h"
#include "xe_configfs.h"
#include "xe_hw_engine_types.h"
#include "xe_module.h"
@@ -21,7 +23,7 @@
* DOC: Xe Configfs
*
* Overview
- * =========
+ * ========
*
* Configfs is a filesystem-based manager of kernel objects. XE KMD registers a
* configfs subsystem called ``xe`` that creates a directory in the mounted
@@ -34,7 +36,7 @@
*
* To create a device, the ``xe`` module should already be loaded, but some
* attributes can only be set before binding the device. It can be accomplished
- * by blocking the driver autoprobe:
+ * by blocking the driver autoprobe::
*
* # echo 0 > /sys/bus/pci/drivers_autoprobe
* # modprobe xe
@@ -115,6 +117,45 @@
*
* This attribute can only be set before binding to the device.
*
+ * Context restore BB
+ * ------------------
+ *
+ * Allow to execute a batch buffer during any context switches. When the
+ * GPU is restoring the context, it executes additional commands. It's useful
+ * for testing additional workarounds and validating certain HW behaviors: it's
+ * not intended for normal execution and will taint the kernel with TAINT_TEST
+ * when used.
+ *
+ * Currently this is implemented only for post and mid context restore.
+ * Examples:
+ *
+ * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the
+ * normal context restore::
+ *
+ * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \
+ * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb
+ *
+ * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 at the
+ * beginning of the context restore::
+ *
+ * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \
+ * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_mid_bb
+
+ * #. Load certain values in a couple of registers (it can be used as a simpler
+ * alternative to the `cmd`) action::
+ *
+ * # cat > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb <<EOF
+ * rcs reg 4F100 DEADBEEF
+ * rcs reg 4F104 FFFFFFFF
+ * EOF
+ *
+ * .. note::
+ *
+ * When using multiple lines, make sure to use a command that is
+ * implemented with a single write syscall, like HEREDOC.
+ *
+ * These attributes can only be set before binding to the device.
+ *
* Remove devices
* ==============
*
@@ -123,17 +164,27 @@
* # rmdir /sys/kernel/config/xe/0000:03:00.0/
*/
+/* Similar to struct xe_bb, but not tied to HW (yet) */
+struct wa_bb {
+ u32 *cs;
+ u32 len; /* in dwords */
+};
+
struct xe_config_group_device {
struct config_group group;
struct xe_config_device {
u64 engines_allowed;
+ struct wa_bb ctx_restore_post_bb[XE_ENGINE_CLASS_MAX];
+ struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX];
bool survivability_mode;
bool enable_psmi;
} config;
/* protects attributes */
struct mutex lock;
+ /* matching descriptor */
+ const struct xe_device_desc *desc;
};
static const struct xe_config_device device_defaults = {
@@ -150,6 +201,7 @@ static void set_device_defaults(struct xe_config_device *config)
struct engine_info {
const char *cls;
u64 mask;
+ enum xe_engine_class engine_class;
};
/* Some helpful macros to aid on the sizing of buffer allocation when parsing */
@@ -157,12 +209,12 @@ struct engine_info {
#define MAX_ENGINE_INSTANCE_CHARS 2
static const struct engine_info engine_info[] = {
- { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK },
- { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK },
- { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK },
- { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK },
- { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK },
- { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK },
+ { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER },
+ { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK, .engine_class = XE_ENGINE_CLASS_COPY },
+ { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_DECODE },
+ { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_ENHANCE },
+ { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK, .engine_class = XE_ENGINE_CLASS_COMPUTE },
+ { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK, .engine_class = XE_ENGINE_CLASS_OTHER },
};
static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item)
@@ -251,7 +303,18 @@ static ssize_t engines_allowed_show(struct config_item *item, char *page)
return p - page;
}
-static bool lookup_engine_mask(const char *pattern, u64 *mask)
+/*
+ * Lookup engine_info. If @mask is not NULL, reduce the mask according to the
+ * instance in @pattern.
+ *
+ * Examples of inputs:
+ * - lookup_engine_info("rcs0", &mask): return "rcs" entry from @engine_info and
+ * mask == BIT_ULL(XE_HW_ENGINE_RCS0)
+ * - lookup_engine_info("rcs*", &mask): return "rcs" entry from @engine_info and
+ * mask == XE_HW_ENGINE_RCS_MASK
+ * - lookup_engine_info("rcs", NULL): return "rcs" entry from @engine_info
+ */
+static const struct engine_info *lookup_engine_info(const char *pattern, u64 *mask)
{
for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
u8 instance;
@@ -261,44 +324,62 @@ static bool lookup_engine_mask(const char *pattern, u64 *mask)
continue;
pattern += strlen(engine_info[i].cls);
+ if (!mask && !*pattern)
+ return &engine_info[i];
if (!strcmp(pattern, "*")) {
*mask = engine_info[i].mask;
- return true;
+ return &engine_info[i];
}
if (kstrtou8(pattern, 10, &instance))
- return false;
+ return NULL;
bit = __ffs64(engine_info[i].mask) + instance;
if (bit >= fls64(engine_info[i].mask))
- return false;
+ return NULL;
*mask = BIT_ULL(bit);
- return true;
+ return &engine_info[i];
}
- return false;
+ return NULL;
+}
+
+static int parse_engine(const char *s, const char *end_chars, u64 *mask,
+ const struct engine_info **pinfo)
+{
+ char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1];
+ const struct engine_info *info;
+ size_t len;
+
+ len = strcspn(s, end_chars);
+ if (len >= sizeof(buf))
+ return -EINVAL;
+
+ memcpy(buf, s, len);
+ buf[len] = '\0';
+
+ info = lookup_engine_info(buf, mask);
+ if (!info)
+ return -ENOENT;
+
+ if (pinfo)
+ *pinfo = info;
+
+ return len;
}
static ssize_t engines_allowed_store(struct config_item *item, const char *page,
size_t len)
{
struct xe_config_group_device *dev = to_xe_config_group_device(item);
- size_t patternlen, p;
+ ssize_t patternlen, p;
u64 mask, val = 0;
for (p = 0; p < len; p += patternlen + 1) {
- char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1];
-
- patternlen = strcspn(page + p, ",\n");
- if (patternlen >= sizeof(buf))
- return -EINVAL;
-
- memcpy(buf, page + p, patternlen);
- buf[patternlen] = '\0';
-
- if (!lookup_engine_mask(buf, &mask))
+ patternlen = parse_engine(page + p, ",\n", &mask, NULL);
+ if (patternlen < 0)
return -EINVAL;
val |= mask;
@@ -339,11 +420,250 @@ static ssize_t enable_psmi_store(struct config_item *item, const char *page, siz
return len;
}
+static bool wa_bb_read_advance(bool dereference, char **p,
+ const char *append, size_t len,
+ size_t *max_size)
+{
+ if (dereference) {
+ if (len >= *max_size)
+ return false;
+ *max_size -= len;
+ if (append)
+ memcpy(*p, append, len);
+ }
+
+ *p += len;
+
+ return true;
+}
+
+static ssize_t wa_bb_show(struct xe_config_group_device *dev,
+ struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX],
+ char *data, size_t sz)
+{
+ char *p = data;
+
+ guard(mutex)(&dev->lock);
+
+ for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
+ enum xe_engine_class ec = engine_info[i].engine_class;
+ size_t len;
+
+ if (!wa_bb[ec].len)
+ continue;
+
+ len = snprintf(p, sz, "%s:", engine_info[i].cls);
+ if (!wa_bb_read_advance(data, &p, NULL, len, &sz))
+ return -ENOBUFS;
+
+ for (size_t j = 0; j < wa_bb[ec].len; j++) {
+ len = snprintf(p, sz, " %08x", wa_bb[ec].cs[j]);
+ if (!wa_bb_read_advance(data, &p, NULL, len, &sz))
+ return -ENOBUFS;
+ }
+
+ if (!wa_bb_read_advance(data, &p, "\n", 1, &sz))
+ return -ENOBUFS;
+ }
+
+ if (!wa_bb_read_advance(data, &p, "", 1, &sz))
+ return -ENOBUFS;
+
+ /* Reserve one more to match check for '\0' */
+ if (!data)
+ p++;
+
+ return p - data;
+}
+
+static ssize_t ctx_restore_mid_bb_show(struct config_item *item, char *page)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_show(dev, dev->config.ctx_restore_mid_bb, page, SZ_4K);
+}
+
+static ssize_t ctx_restore_post_bb_show(struct config_item *item, char *page)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_show(dev, dev->config.ctx_restore_post_bb, page, SZ_4K);
+}
+
+static void wa_bb_append(struct wa_bb *wa_bb, u32 val)
+{
+ if (wa_bb->cs)
+ wa_bb->cs[wa_bb->len] = val;
+
+ wa_bb->len++;
+}
+
+static ssize_t parse_hex(const char *line, u32 *pval)
+{
+ char numstr[12];
+ const char *p;
+ ssize_t numlen;
+
+ p = line + strspn(line, " \t");
+ if (!*p || *p == '\n')
+ return 0;
+
+ numlen = strcspn(p, " \t\n");
+ if (!numlen || numlen >= sizeof(numstr) - 1)
+ return -EINVAL;
+
+ memcpy(numstr, p, numlen);
+ numstr[numlen] = '\0';
+ p += numlen;
+
+ if (kstrtou32(numstr, 16, pval))
+ return -EINVAL;
+
+ return p - line;
+}
+
+/*
+ * Parse lines with the format
+ *
+ * <engine-class> cmd <u32> <u32...>
+ * <engine-class> reg <u32_addr> <u32_val>
+ *
+ * and optionally save them in @wa_bb[i].cs is non-NULL.
+ *
+ * Return the number of dwords parsed.
+ */
+static ssize_t parse_wa_bb_lines(const char *lines,
+ struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX])
+{
+ ssize_t dwords = 0, ret;
+ const char *p;
+
+ for (p = lines; *p; p++) {
+ const struct engine_info *info = NULL;
+ u32 val, val2;
+
+ /* Also allow empty lines */
+ p += strspn(p, " \t\n");
+ if (!*p)
+ break;
+
+ ret = parse_engine(p, " \t\n", NULL, &info);
+ if (ret < 0)
+ return ret;
+
+ p += ret;
+ p += strspn(p, " \t");
+
+ if (str_has_prefix(p, "cmd")) {
+ for (p += strlen("cmd"); *p;) {
+ ret = parse_hex(p, &val);
+ if (ret < 0)
+ return -EINVAL;
+ if (!ret)
+ break;
+
+ p += ret;
+ dwords++;
+ wa_bb_append(&wa_bb[info->engine_class], val);
+ }
+ } else if (str_has_prefix(p, "reg")) {
+ p += strlen("reg");
+ ret = parse_hex(p, &val);
+ if (ret <= 0)
+ return -EINVAL;
+
+ p += ret;
+ ret = parse_hex(p, &val2);
+ if (ret <= 0)
+ return -EINVAL;
+
+ p += ret;
+ dwords += 3;
+ wa_bb_append(&wa_bb[info->engine_class],
+ MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1));
+ wa_bb_append(&wa_bb[info->engine_class], val);
+ wa_bb_append(&wa_bb[info->engine_class], val2);
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return dwords;
+}
+
+static ssize_t wa_bb_store(struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX],
+ struct xe_config_group_device *dev,
+ const char *page, size_t len)
+{
+ /* tmp_wa_bb must match wa_bb's size */
+ struct wa_bb tmp_wa_bb[XE_ENGINE_CLASS_MAX] = { };
+ ssize_t count, class;
+ u32 *tmp;
+
+ /* 1. Count dwords - wa_bb[i].cs is NULL for all classes */
+ count = parse_wa_bb_lines(page, tmp_wa_bb);
+ if (count < 0)
+ return count;
+
+ guard(mutex)(&dev->lock);
+
+ if (is_bound(dev))
+ return -EBUSY;
+
+ /*
+ * 2. Allocate a u32 array and set the pointers to the right positions
+ * according to the length of each class' wa_bb
+ */
+ tmp = krealloc(wa_bb[0].cs, count * sizeof(u32), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (!count) {
+ memset(wa_bb, 0, sizeof(tmp_wa_bb));
+ return len;
+ }
+
+ for (class = 0, count = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+ tmp_wa_bb[class].cs = tmp + count;
+ count += tmp_wa_bb[class].len;
+ tmp_wa_bb[class].len = 0;
+ }
+
+ /* 3. Parse wa_bb lines again, this time saving the values */
+ count = parse_wa_bb_lines(page, tmp_wa_bb);
+ if (count < 0)
+ return count;
+
+ memcpy(wa_bb, tmp_wa_bb, sizeof(tmp_wa_bb));
+
+ return len;
+}
+
+static ssize_t ctx_restore_mid_bb_store(struct config_item *item,
+ const char *data, size_t sz)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_store(dev->config.ctx_restore_mid_bb, dev, data, sz);
+}
+
+static ssize_t ctx_restore_post_bb_store(struct config_item *item,
+ const char *data, size_t sz)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_store(dev->config.ctx_restore_post_bb, dev, data, sz);
+}
+
+CONFIGFS_ATTR(, ctx_restore_mid_bb);
+CONFIGFS_ATTR(, ctx_restore_post_bb);
CONFIGFS_ATTR(, enable_psmi);
CONFIGFS_ATTR(, engines_allowed);
CONFIGFS_ATTR(, survivability_mode);
static struct configfs_attribute *xe_config_device_attrs[] = {
+ &attr_ctx_restore_mid_bb,
+ &attr_ctx_restore_post_bb,
&attr_enable_psmi,
&attr_engines_allowed,
&attr_survivability_mode,
@@ -355,6 +675,8 @@ static void xe_config_device_release(struct config_item *item)
struct xe_config_group_device *dev = to_xe_config_group_device(item);
mutex_destroy(&dev->lock);
+
+ kfree(dev->config.ctx_restore_post_bb[0].cs);
kfree(dev);
}
@@ -362,8 +684,26 @@ static struct configfs_item_operations xe_config_device_ops = {
.release = xe_config_device_release,
};
+static bool xe_config_device_is_visible(struct config_item *item,
+ struct configfs_attribute *attr, int n)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ if (attr == &attr_survivability_mode) {
+ if (!dev->desc->is_dgfx || dev->desc->platform < XE_BATTLEMAGE)
+ return false;
+ }
+
+ return true;
+}
+
+static struct configfs_group_operations xe_config_device_group_ops = {
+ .is_visible = xe_config_device_is_visible,
+};
+
static const struct config_item_type xe_config_device_type = {
.ct_item_ops = &xe_config_device_ops,
+ .ct_group_ops = &xe_config_device_group_ops,
.ct_attrs = xe_config_device_attrs,
.ct_owner = THIS_MODULE,
};
@@ -442,6 +782,7 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro
if (!dev)
return ERR_PTR(-ENOMEM);
+ dev->desc = match;
set_device_defaults(&dev->config);
config_group_init_type_name(&dev->group, name, &xe_config_device_type);
@@ -451,12 +792,12 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro
return &dev->group;
}
-static struct configfs_group_operations xe_config_device_group_ops = {
+static struct configfs_group_operations xe_config_group_ops = {
.make_group = xe_config_make_device_group,
};
static const struct config_item_type xe_configfs_type = {
- .ct_group_ops = &xe_config_device_group_ops,
+ .ct_group_ops = &xe_config_group_ops,
.ct_owner = THIS_MODULE,
};
@@ -543,23 +884,6 @@ bool xe_configfs_get_survivability_mode(struct pci_dev *pdev)
}
/**
- * xe_configfs_clear_survivability_mode - clear configfs survivability mode
- * @pdev: pci device
- */
-void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
-{
- struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
-
- if (!dev)
- return;
-
- guard(mutex)(&dev->lock);
- dev->config.survivability_mode = 0;
-
- config_group_put(&dev->group);
-}
-
-/**
* xe_configfs_get_engines_allowed - get engine allowed mask from configfs
* @pdev: pci device
*
@@ -594,11 +918,63 @@ bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev)
return false;
ret = dev->config.enable_psmi;
- config_item_put(&dev->group.cg_item);
+ config_group_put(&dev->group);
return ret;
}
+/**
+ * xe_configfs_get_ctx_restore_mid_bb - get configfs ctx_restore_mid_bb setting
+ * @pdev: pci device
+ * @class: hw engine class
+ * @cs: pointer to the bb to use - only valid during probe
+ *
+ * Return: Number of dwords used in the mid_ctx_restore setting in configfs
+ */
+u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
+ const u32 **cs)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ u32 len;
+
+ if (!dev)
+ return 0;
+
+ if (cs)
+ *cs = dev->config.ctx_restore_mid_bb[class].cs;
+
+ len = dev->config.ctx_restore_mid_bb[class].len;
+ config_group_put(&dev->group);
+
+ return len;
+}
+
+/**
+ * xe_configfs_get_ctx_restore_post_bb - get configfs ctx_restore_post_bb setting
+ * @pdev: pci device
+ * @class: hw engine class
+ * @cs: pointer to the bb to use - only valid during probe
+ *
+ * Return: Number of dwords used in the post_ctx_restore setting in configfs
+ */
+u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
+ const u32 **cs)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ u32 len;
+
+ if (!dev)
+ return 0;
+
+ *cs = dev->config.ctx_restore_post_bb[class].cs;
+ len = dev->config.ctx_restore_post_bb[class].len;
+ config_group_put(&dev->group);
+
+ return len;
+}
+
int __init xe_configfs_init(void)
{
int ret;
@@ -614,7 +990,7 @@ int __init xe_configfs_init(void)
return 0;
}
-void __exit xe_configfs_exit(void)
+void xe_configfs_exit(void)
{
configfs_unregister_subsystem(&xe_configfs);
mutex_destroy(&xe_configfs.su_mutex);
diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
index 58c8c3164000..c61e0e47ed94 100644
--- a/drivers/gpu/drm/xe/xe_configfs.h
+++ b/drivers/gpu/drm/xe/xe_configfs.h
@@ -8,6 +8,8 @@
#include <linux/limits.h>
#include <linux/types.h>
+#include <xe_hw_engine_types.h>
+
struct pci_dev;
#if IS_ENABLED(CONFIG_CONFIGFS_FS)
@@ -15,17 +17,23 @@ int xe_configfs_init(void);
void xe_configfs_exit(void);
void xe_configfs_check_device(struct pci_dev *pdev);
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
-void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev);
bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev);
+u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs);
+u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs);
#else
static inline int xe_configfs_init(void) { return 0; }
static inline void xe_configfs_exit(void) { }
static inline void xe_configfs_check_device(struct pci_dev *pdev) { }
static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }
-static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { }
static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; }
static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; }
+static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs) { return 0; }
+static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs) { return 0; }
#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 8d6df6bd885e..cd977dbd1ef6 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -24,7 +24,9 @@
#include "xe_pxp_debugfs.h"
#include "xe_sriov.h"
#include "xe_sriov_pf.h"
+#include "xe_sriov_vf.h"
#include "xe_step.h"
+#include "xe_tile_debugfs.h"
#include "xe_wa.h"
#include "xe_vsec.h"
@@ -38,7 +40,7 @@ DECLARE_FAULT_ATTR(gt_reset_failure);
DECLARE_FAULT_ATTR(inject_csc_hw_error);
static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio,
- u32 offset, char *name, struct drm_printer *p)
+ u32 offset, const char *name, struct drm_printer *p)
{
u64 residency = 0;
int ret;
@@ -134,9 +136,9 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data)
p = drm_seq_file_printer(m);
xe_pm_runtime_get(xe);
mmio = xe_root_tile_mmio(xe);
- struct {
+ static const struct {
u32 offset;
- char *name;
+ const char *name;
} residencies[] = {
{BMG_G2_RESIDENCY_OFFSET, "Package G2"},
{BMG_G6_RESIDENCY_OFFSET, "Package G6"},
@@ -163,9 +165,9 @@ static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data)
xe_pm_runtime_get(xe);
mmio = xe_root_tile_mmio(xe);
- struct {
+ static const struct {
u32 offset;
- char *name;
+ const char *name;
} residencies[] = {
{BMG_PCIE_LINK_L0_RESIDENCY_OFFSET, "PCIE LINK L0 RESIDENCY"},
{BMG_PCIE_LINK_L1_RESIDENCY_OFFSET, "PCIE LINK L1 RESIDENCY"},
@@ -329,23 +331,44 @@ static const struct file_operations atomic_svm_timeslice_ms_fops = {
.write = atomic_svm_timeslice_ms_set,
};
-static void create_tile_debugfs(struct xe_tile *tile, struct dentry *root)
+static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
{
- char name[8];
+ struct xe_device *xe = file_inode(f)->i_private;
+ struct xe_late_bind *late_bind = &xe->late_bind;
+ char buf[32];
+ int len;
- snprintf(name, sizeof(name), "tile%u", tile->id);
- tile->debugfs = debugfs_create_dir(name, root);
- if (IS_ERR(tile->debugfs))
- return;
+ len = scnprintf(buf, sizeof(buf), "%d\n", late_bind->disable);
+
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t disable_late_binding_set(struct file *f, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ struct xe_late_bind *late_bind = &xe->late_bind;
+ u32 uval;
+ ssize_t ret;
- /*
- * Store the xe_tile pointer as private data of the tile/ directory
- * node so other tile specific attributes under that directory may
- * refer to it by looking at its parent node private data.
- */
- tile->debugfs->d_inode->i_private = tile;
+ ret = kstrtouint_from_user(ubuf, size, sizeof(uval), &uval);
+ if (ret)
+ return ret;
+
+ if (uval > 1)
+ return -EINVAL;
+
+ late_bind->disable = !!uval;
+ return size;
}
+static const struct file_operations disable_late_binding_fops = {
+ .owner = THIS_MODULE,
+ .read = disable_late_binding_show,
+ .write = disable_late_binding_set,
+};
+
void xe_debugfs_register(struct xe_device *xe)
{
struct ttm_device *bdev = &xe->ttm;
@@ -362,7 +385,7 @@ void xe_debugfs_register(struct xe_device *xe)
ARRAY_SIZE(debugfs_list),
root, minor);
- if (xe->info.platform == XE_BATTLEMAGE) {
+ if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) {
drm_debugfs_create_files(debugfs_residencies,
ARRAY_SIZE(debugfs_residencies),
root, minor);
@@ -379,6 +402,9 @@ void xe_debugfs_register(struct xe_device *xe)
debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe,
&atomic_svm_timeslice_ms_fops);
+ debugfs_create_file("disable_late_binding", 0600, root, xe,
+ &disable_late_binding_fops);
+
for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
man = ttm_manager_type(bdev, mem_type);
@@ -398,7 +424,7 @@ void xe_debugfs_register(struct xe_device *xe)
ttm_resource_manager_create_debugfs(man, root, "stolen_mm");
for_each_tile(tile, xe, tile_id)
- create_tile_debugfs(tile, root);
+ xe_tile_debugfs_register(tile);
for_each_gt(gt, xe, id)
xe_gt_debugfs_register(gt);
@@ -411,4 +437,6 @@ void xe_debugfs_register(struct xe_device *xe)
if (IS_SRIOV_PF(xe))
xe_sriov_pf_debugfs_register(xe, root);
+ else if (IS_SRIOV_VF(xe))
+ xe_sriov_vf_debugfs_register(xe, root);
}
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 9e4773a17ef8..fdb7b7498920 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -45,6 +45,7 @@
#include "xe_hwmon.h"
#include "xe_i2c.h"
#include "xe_irq.h"
+#include "xe_late_bind_fw.h"
#include "xe_mmio.h"
#include "xe_module.h"
#include "xe_nvm.h"
@@ -457,6 +458,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
if (err)
goto err;
+ xe_validation_device_init(&xe->val);
+
init_waitqueue_head(&xe->ufence_wq);
init_rwsem(&xe->usm.lock);
@@ -530,7 +533,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe)
* re-init and saving/restoring (or re-populating) the wiped memory. Since we
* perform the FLR as the very last action before releasing access to the HW
* during the driver release flow, we don't attempt recovery at all, because
- * if/when a new instance of i915 is bound to the device it will do a full
+ * if/when a new instance of Xe is bound to the device it will do a full
* re-init anyway.
*/
static void __xe_driver_flr(struct xe_device *xe)
@@ -901,6 +904,10 @@ int xe_device_probe(struct xe_device *xe)
if (err)
return err;
+ err = xe_late_bind_init(&xe->late_bind);
+ if (err)
+ return err;
+
err = xe_oa_init(xe);
if (err)
return err;
@@ -950,7 +957,7 @@ int xe_device_probe(struct xe_device *xe)
xe_vsec_init(xe);
- err = xe_sriov_late_init(xe);
+ err = xe_sriov_init_late(xe);
if (err)
goto err_unregister_display;
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index b7f8fcfed8d8..c5151c86a98a 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -71,6 +71,15 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_RW(vram_d3cold_threshold);
+static struct attribute *vram_attrs[] = {
+ &dev_attr_vram_d3cold_threshold.attr,
+ NULL
+};
+
+static const struct attribute_group vram_attr_group = {
+ .attrs = vram_attrs,
+};
+
static ssize_t
lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf)
{
@@ -149,41 +158,16 @@ out:
}
static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version);
-static int late_bind_create_files(struct device *dev)
-{
- struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev));
- struct xe_tile *root = xe_device_get_root_tile(xe);
- u32 cap = 0;
- int ret;
-
- xe_pm_runtime_get(xe);
-
- ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
- &cap, NULL);
- if (ret) {
- if (ret == -ENXIO) {
- drm_dbg(&xe->drm, "Late binding not supported by firmware\n");
- ret = 0;
- }
- goto out;
- }
-
- if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) {
- ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr);
- if (ret)
- goto out;
- }
-
- if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap))
- ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr);
-out:
- xe_pm_runtime_put(xe);
-
- return ret;
-}
+static struct attribute *late_bind_attrs[] = {
+ &dev_attr_lb_fan_control_version.attr,
+ &dev_attr_lb_voltage_regulator_version.attr,
+ NULL
+};
-static void late_bind_remove_files(struct device *dev)
+static umode_t late_bind_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
{
+ struct device *dev = kobj_to_dev(kobj);
struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev));
struct xe_tile *root = xe_device_get_root_tile(xe);
u32 cap = 0;
@@ -193,18 +177,25 @@ static void late_bind_remove_files(struct device *dev)
ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
&cap, NULL);
+ xe_pm_runtime_put(xe);
if (ret)
- goto out;
+ return 0;
- if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap))
- sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr);
+ if (attr == &dev_attr_lb_fan_control_version.attr &&
+ REG_FIELD_GET(V1_FAN_SUPPORTED, cap))
+ return attr->mode;
+ if (attr == &dev_attr_lb_voltage_regulator_version.attr &&
+ REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap))
+ return attr->mode;
- if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap))
- sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr);
-out:
- xe_pm_runtime_put(xe);
+ return 0;
}
+static const struct attribute_group late_bind_attr_group = {
+ .attrs = late_bind_attrs,
+ .is_visible = late_bind_attr_is_visible,
+};
+
/**
* DOC: PCIe Gen5 Limitations
*
@@ -278,24 +269,15 @@ auto_link_downgrade_status_show(struct device *dev, struct device_attribute *att
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status);
-static const struct attribute *auto_link_downgrade_attrs[] = {
+static struct attribute *auto_link_downgrade_attrs[] = {
&dev_attr_auto_link_downgrade_capable.attr,
&dev_attr_auto_link_downgrade_status.attr,
NULL
};
-static void xe_device_sysfs_fini(void *arg)
-{
- struct xe_device *xe = arg;
-
- if (xe->d3cold.capable)
- sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
-
- if (xe->info.platform == XE_BATTLEMAGE) {
- sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs);
- late_bind_remove_files(xe->drm.dev);
- }
-}
+static const struct attribute_group auto_link_downgrade_attr_group = {
+ .attrs = auto_link_downgrade_attrs,
+};
int xe_device_sysfs_init(struct xe_device *xe)
{
@@ -303,24 +285,20 @@ int xe_device_sysfs_init(struct xe_device *xe)
int ret;
if (xe->d3cold.capable) {
- ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+ ret = devm_device_add_group(dev, &vram_attr_group);
if (ret)
return ret;
}
- if (xe->info.platform == XE_BATTLEMAGE) {
- ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs);
+ if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) {
+ ret = devm_device_add_group(dev, &auto_link_downgrade_attr_group);
if (ret)
- goto cleanup;
+ return ret;
- ret = late_bind_create_files(dev);
+ ret = devm_device_add_group(dev, &late_bind_attr_group);
if (ret)
- goto cleanup;
+ return ret;
}
- return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe);
-
-cleanup:
- xe_device_sysfs_fini(xe);
- return ret;
+ return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index e7e6eecf19ef..a6c361db11d9 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -14,6 +14,7 @@
#include "xe_devcoredump_types.h"
#include "xe_heci_gsc.h"
+#include "xe_late_bind_fw_types.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
#include "xe_oa_types.h"
@@ -26,6 +27,7 @@
#include "xe_sriov_vf_ccs_types.h"
#include "xe_step_types.h"
#include "xe_survivability_mode_types.h"
+#include "xe_validation.h"
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
#define TEST_VM_OPS_ERROR
@@ -183,9 +185,6 @@ struct xe_tile {
struct {
/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
struct xe_ggtt_node *ggtt_balloon[2];
-
- /** @sriov.vf.ccs: CCS read and write contexts for VF. */
- struct xe_tile_vf_ccs ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
} vf;
} sriov;
@@ -282,6 +281,8 @@ struct xe_device {
u8 has_heci_cscfi:1;
/** @info.has_heci_gscfi: device has heci gscfi */
u8 has_heci_gscfi:1;
+ /** @info.has_late_bind: Device has firmware late binding support */
+ u8 has_late_bind:1;
/** @info.has_llc: Device has a shared CPU+GPU last level cache */
u8 has_llc:1;
/** @info.has_mbx_power_limits: Device has support to manage power limits using
@@ -535,6 +536,9 @@ struct xe_device {
/** @nvm: discrete graphics non-volatile memory */
struct intel_dg_nvm_dev *nvm;
+ /** @late_bind: xe mei late bind interface */
+ struct xe_late_bind late_bind;
+
/** @oa: oa observation subsystem */
struct xe_oa oa;
@@ -586,6 +590,8 @@ struct xe_device {
*/
atomic64_t global_total_pages;
#endif
+ /** @val: The domain for exhaustive eviction, which is currently per device. */
+ struct xe_validation_device val;
/** @psmi: GPU debugging via additional validation HW */
struct {
@@ -595,6 +601,13 @@ struct xe_device {
u8 region_mask;
} psmi;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+ /** @g2g_test_array: for testing G2G communications */
+ u32 *g2g_test_array;
+ /** @g2g_test_count: for testing G2G communications */
+ atomic_t g2g_test_count;
+#endif
+
/* private: */
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 95d06bd65b0f..a7d67725c3ee 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -51,6 +51,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
struct drm_gem_object *obj = attach->dmabuf->priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = xe_bo_device(bo);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
int ret;
/*
@@ -63,7 +64,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
return -EINVAL;
}
- ret = xe_bo_migrate(bo, XE_PL_TT);
+ ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
drm_dbg(&xe->drm,
@@ -72,7 +73,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
return ret;
}
- ret = xe_bo_pin_external(bo, true);
+ ret = xe_bo_pin_external(bo, true, exec);
xe_assert(xe, !ret);
return 0;
@@ -92,6 +93,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
struct sg_table *sgt;
int r = 0;
@@ -100,9 +102,9 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
if (!xe_bo_is_pinned(bo)) {
if (!attach->peer2peer)
- r = xe_bo_migrate(bo, XE_PL_TT);
+ r = xe_bo_migrate(bo, XE_PL_TT, NULL, exec);
else
- r = xe_bo_validate(bo, NULL, false);
+ r = xe_bo_validate(bo, NULL, false, exec);
if (r)
return ERR_PTR(r);
}
@@ -161,15 +163,26 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
struct xe_bo *bo = gem_to_xe_bo(obj);
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int ret = 0;
if (!reads)
return 0;
/* Can we do interruptible lock here? */
- xe_bo_lock(bo, false);
- (void)xe_bo_migrate(bo, XE_PL_TT);
- xe_bo_unlock(bo);
+ xe_validation_guard(&ctx, &xe_bo_device(bo)->val, &exec, (struct xe_val_flags) {}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ ret = xe_bo_migrate(bo, XE_PL_TT, NULL, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ }
+ /* If we failed, cpu-access takes place in current placement. */
return 0;
}
@@ -220,32 +233,45 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
{
struct dma_resv *resv = dma_buf->resv;
struct xe_device *xe = to_xe_device(dev);
+ struct xe_validation_ctx ctx;
+ struct drm_gem_object *dummy_obj;
+ struct drm_exec exec;
struct xe_bo *bo;
- int ret;
-
- dma_resv_lock(resv, NULL);
- bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
- 0, /* Will require 1way or 2way for vm_bind */
- ttm_bo_type_sg, XE_BO_FLAG_SYSTEM);
- if (IS_ERR(bo)) {
- ret = PTR_ERR(bo);
- goto error;
+ int ret = 0;
+
+ dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
+ if (!dummy_obj)
+ return ERR_PTR(-ENOMEM);
+
+ dummy_obj->resv = resv;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) {
+ ret = drm_exec_lock_obj(&exec, dummy_obj);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+ 0, /* Will require 1way or 2way for vm_bind */
+ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
}
- dma_resv_unlock(resv);
-
- return &bo->ttm.base;
+ drm_gem_object_put(dummy_obj);
-error:
- dma_resv_unlock(resv);
- return ERR_PTR(ret);
+ return ret ? ERR_PTR(ret) : &bo->ttm.base;
}
static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = attach->importer_priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
- XE_WARN_ON(xe_bo_evict(bo));
+ XE_WARN_ON(xe_bo_evict(bo, exec));
}
static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index fdd514fec5ef..f5cfdf29fde3 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -617,9 +617,8 @@ static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
size = stream->per_xecore_buf_size * last_xecore;
- bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
- size, ~0ull, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
+ bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false);
if (IS_ERR(bo)) {
kfree(stream->xecore_buf);
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 374c831e691b..7715e74bb945 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -19,6 +19,7 @@
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#include "xe_sync.h"
+#include "xe_svm.h"
#include "xe_vm.h"
/**
@@ -97,9 +98,13 @@
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{
struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
+ int ret;
/* The fence slot added here is intended for the exec sched job. */
- return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
+ xe_vm_set_validation_exec(vm, &vm_exec->exec);
+ ret = xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
+ xe_vm_set_validation_exec(vm, NULL);
+ return ret;
}
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -115,10 +120,10 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
struct drm_exec *exec = &vm_exec.exec;
u32 i, num_syncs, num_ufence = 0;
+ struct xe_validation_ctx ctx;
struct xe_sched_job *job;
struct xe_vm *vm;
bool write_locked, skip_retry = false;
- ktime_t end = 0;
int err = 0;
struct xe_hw_engine_group *group;
enum xe_hw_engine_group_execution_mode mode, previous_mode;
@@ -246,17 +251,12 @@ retry:
if (err)
goto err_unlock_list;
- vm_exec.vm = &vm->gpuvm;
- vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
- if (xe_vm_in_lr_mode(vm)) {
- drm_exec_init(exec, vm_exec.flags, 0);
- } else {
- err = drm_gpuvm_exec_lock(&vm_exec);
- if (err) {
- if (xe_vm_validate_should_retry(exec, err, &end))
- err = -EAGAIN;
+ if (!xe_vm_in_lr_mode(vm)) {
+ vm_exec.vm = &vm->gpuvm;
+ vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
+ err = xe_validation_exec_lock(&ctx, &vm_exec, &xe->val);
+ if (err)
goto err_unlock_list;
- }
}
if (xe_vm_is_closed_or_banned(q->vm)) {
@@ -303,7 +303,7 @@ retry:
if (err)
goto err_put_job;
- err = down_read_interruptible(&vm->userptr.notifier_lock);
+ err = xe_svm_notifier_lock_interruptible(vm);
if (err)
goto err_put_job;
@@ -345,12 +345,13 @@ retry:
err_repin:
if (!xe_vm_in_lr_mode(vm))
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
err_put_job:
if (err)
xe_sched_job_put(job);
err_exec:
- drm_exec_fini(exec);
+ if (!xe_vm_in_lr_mode(vm))
+ xe_validation_ctx_fini(&ctx);
err_unlock_list:
up_read(&vm->lock);
if (err == -EAGAIN && !skip_retry)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 71c7690a92b3..7fdd0a97a628 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -28,6 +28,7 @@
#include "xe_pm.h"
#include "xe_res_cursor.h"
#include "xe_sriov.h"
+#include "xe_tile_printk.h"
#include "xe_tile_sriov_vf.h"
#include "xe_tlb_inval.h"
#include "xe_wa.h"
@@ -269,7 +270,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
gsm_size = probe_gsm_size(pdev);
if (gsm_size == 0) {
- drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
+ xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n");
return -ENOMEM;
}
@@ -466,8 +467,8 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf));
- xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n",
- node->start, node->start + node->size, buf, description);
+ xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n",
+ node->start, node->start + node->size, buf, description);
}
}
@@ -499,9 +500,8 @@ int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64
err = drm_mm_reserve_node(&ggtt->mm, &node->base);
- if (xe_gt_WARN(ggtt->tile->primary_gt, err,
- "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
- node->base.start, node->base.start + node->base.size, ERR_PTR(err)))
+ if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
+ node->base.start, node->base.start + node->base.size, ERR_PTR(err)))
return err;
xe_ggtt_dump_node(ggtt, &node->base, "balloon");
@@ -731,7 +731,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo)
}
static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end)
+ u64 start, u64 end, struct drm_exec *exec)
{
u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE;
u8 tile_id = ggtt->tile->id;
@@ -746,7 +746,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
return 0;
}
- err = xe_bo_validate(bo, NULL, false);
+ err = xe_bo_validate(bo, NULL, false, exec);
if (err)
return err;
@@ -788,25 +788,28 @@ out:
* @bo: the &xe_bo to be inserted
* @start: address where it will be inserted
* @end: end of the range where it will be inserted
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Return: 0 on success or a negative error code on failure.
*/
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end)
+ u64 start, u64 end, struct drm_exec *exec)
{
- return __xe_ggtt_insert_bo_at(ggtt, bo, start, end);
+ return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec);
}
/**
* xe_ggtt_insert_bo - Insert BO into GGTT
* @ggtt: the &xe_ggtt where bo will be inserted
* @bo: the &xe_bo to be inserted
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo,
+ struct drm_exec *exec)
{
- return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
+ return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index fbe1e397d05d..75fc7a1efea7 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -10,6 +10,7 @@
struct drm_printer;
struct xe_tile;
+struct drm_exec;
struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile);
int xe_ggtt_init_early(struct xe_ggtt *ggtt);
@@ -31,9 +32,9 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node);
void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
struct xe_bo *bo, u16 pat_index);
void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo);
-int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec);
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end);
+ u64 start, u64 end, struct drm_exec *exec);
void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare);
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index f5ae28af60d4..83d61bf8ec62 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -136,10 +136,10 @@ static int query_compatibility_version(struct xe_gsc *gsc)
u64 ggtt_offset;
int err;
- bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, tile, GSC_VER_PKT_SZ * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo)) {
xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index bf3a67b5951c..f253e2df4907 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -31,6 +31,7 @@
#include "xe_reg_whitelist.h"
#include "xe_sa.h"
#include "xe_sriov.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_tuning.h"
#include "xe_uc_debugfs.h"
#include "xe_wa.h"
@@ -123,45 +124,6 @@ static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
return ret;
}
-static int sa_info(struct xe_gt *gt, struct drm_printer *p)
-{
- struct xe_tile *tile = gt_to_tile(gt);
-
- xe_pm_runtime_get(gt_to_xe(gt));
- drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
- xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool));
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int sa_info_vf_ccs(struct xe_gt *gt, struct drm_printer *p)
-{
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_sa_manager *bb_pool;
- enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
-
- if (!IS_VF_CCS_READY(gt_to_xe(gt)))
- return 0;
-
- xe_pm_runtime_get(gt_to_xe(gt));
-
- for_each_ccs_rw_ctx(ctx_id) {
- bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool;
- if (!bb_pool)
- break;
-
- drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
- drm_printf(p, "-------------------------\n");
- drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
- drm_puts(p, "\n");
- }
-
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
static int topology(struct xe_gt *gt, struct drm_printer *p)
{
xe_pm_runtime_get(gt_to_xe(gt));
@@ -316,7 +278,6 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
* - without access to the PF specific data
*/
static const struct drm_info_list vf_safe_debugfs_list[] = {
- {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
{"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
@@ -327,17 +288,9 @@ static const struct drm_info_list vf_safe_debugfs_list[] = {
{"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
{"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc},
{"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc},
- {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info},
{"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig},
};
-/*
- * only for GT debugfs files which are valid on VF. Not valid on PF.
- */
-static const struct drm_info_list vf_only_debugfs_list[] = {
- {"sa_info_vf_ccs", .show = xe_gt_debugfs_simple_show, .data = sa_info_vf_ccs},
-};
-
/* everything else should be added here */
static const struct drm_info_list pf_only_debugfs_list[] = {
{"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
@@ -363,6 +316,24 @@ static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t
return count;
}
+static ssize_t stats_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *s = file->private_data;
+ struct xe_gt *gt = s->private;
+
+ return write_to_gt_call(userbuf, count, ppos, xe_gt_stats_clear, gt);
+}
+
+static int stats_show(struct seq_file *s, void *unused)
+{
+ struct drm_printer p = drm_seq_file_printer(s);
+ struct xe_gt *gt = s->private;
+
+ return xe_gt_stats_print_info(gt, &p);
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(stats);
+
static void force_reset(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -448,6 +419,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
root->d_inode->i_private = gt;
/* VF safe */
+ debugfs_create_file("stats", 0600, root, gt, &stats_fops);
debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops);
debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops);
@@ -459,11 +431,6 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
drm_debugfs_create_files(pf_only_debugfs_list,
ARRAY_SIZE(pf_only_debugfs_list),
root, minor);
- else
- drm_debugfs_create_files(vf_only_debugfs_list,
- ARRAY_SIZE(vf_only_debugfs_list),
- root, minor);
-
xe_uc_debugfs_register(&gt->uc, root);
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 60d9354e7dbf..4ff1b6b58d6b 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -227,6 +227,33 @@ static ssize_t max_freq_store(struct kobject *kobj,
}
static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq);
+static ssize_t power_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+
+ xe_guc_pc_get_power_profile(dev_to_pc(dev), buff);
+
+ return strlen(buff);
+}
+
+static ssize_t power_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ int err;
+
+ xe_pm_runtime_get(dev_to_xe(dev));
+ err = xe_guc_pc_set_power_profile(pc, buff);
+ xe_pm_runtime_put(dev_to_xe(dev));
+
+ return err ?: count;
+}
+static struct kobj_attribute attr_power_profile = __ATTR_RW(power_profile);
+
static const struct attribute *freq_attrs[] = {
&attr_act_freq.attr,
&attr_cur_freq.attr,
@@ -236,6 +263,7 @@ static const struct attribute *freq_attrs[] = {
&attr_rpn_freq.attr,
&attr_min_freq.attr,
&attr_max_freq.attr,
+ &attr_power_profile.attr,
NULL
};
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 683ac021a06d..8fb1cae91724 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -362,7 +362,7 @@ fallback:
* @group: pointer to storage for steering group ID
* @instance: pointer to storage for steering instance ID
*/
-void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
+void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
{
xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index bc06520befab..283a1c9770e2 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -31,7 +31,8 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
u8 *group, u8 *instance);
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
-void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
+void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt,
+ unsigned int dss, u16 *group, u16 *instance);
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);
/*
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index d02d22fb3659..a054d6010ae0 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -87,10 +87,8 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
if (!bo)
return 0;
- err = need_vram_move ? xe_bo_migrate(bo, vram->placement) :
- xe_bo_validate(bo, vm, true);
-
- return err;
+ return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) :
+ xe_bo_validate(bo, vm, true, exec);
}
static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma,
@@ -98,9 +96,9 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma,
{
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct dma_fence *fence;
- ktime_t end = 0;
int err, needs_vram;
lockdep_assert_held_write(&vm->lock);
@@ -129,22 +127,22 @@ retry_userptr:
}
/* Lock VM and BOs dma-resv */
- drm_exec_init(&exec, 0, 0);
+ xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {});
drm_exec_until_all_locked(&exec) {
err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram);
drm_exec_retry_on_contention(&exec);
- if (xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
+ xe_validation_retry_on_oom(&ctx, &err);
if (err)
goto unlock_dma_resv;
/* Bind VMA only to the GT that has faulted */
trace_xe_vma_pf_bind(vma);
+ xe_vm_set_validation_exec(vm, &exec);
fence = xe_vma_rebind(vm, vma, BIT(tile->id));
+ xe_vm_set_validation_exec(vm, NULL);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
- if (xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
+ xe_validation_retry_on_oom(&ctx, &err);
goto unlock_dma_resv;
}
}
@@ -153,7 +151,7 @@ retry_userptr:
dma_fence_put(fence);
unlock_dma_resv:
- drm_exec_fini(&exec);
+ xe_validation_ctx_fini(&ctx);
if (err == -EAGAIN)
goto retry_userptr;
@@ -535,6 +533,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
{
struct xe_device *xe = gt_to_xe(gt);
struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct xe_vm *vm;
struct xe_vma *vma;
@@ -564,15 +563,14 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
goto unlock_vm;
/* Lock VM and BOs dma-resv */
- drm_exec_init(&exec, 0, 0);
+ xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {});
drm_exec_until_all_locked(&exec) {
ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram);
drm_exec_retry_on_contention(&exec);
- if (ret)
- break;
+ xe_validation_retry_on_oom(&ctx, &ret);
}
- drm_exec_fini(&exec);
+ xe_validation_ctx_fini(&ctx);
unlock_vm:
up_read(&vm->lock);
xe_vm_put(vm);
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 11da0228cea7..1313d32862db 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -6,18 +6,22 @@
#ifndef _XE_GT_PRINTK_H_
#define _XE_GT_PRINTK_H_
-#include <drm/drm_print.h>
-
#include "xe_gt_types.h"
+#include "xe_tile_printk.h"
+
+#define __XE_GT_PRINTK_FMT(_gt, _fmt, _args...) "GT%u: " _fmt, (_gt)->info.id, ##_args
#define xe_gt_printk(_gt, _level, _fmt, ...) \
- drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_tile_printk((_gt)->tile, _level, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
+
+#define xe_gt_err(_gt, _fmt, ...) \
+ xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
#define xe_gt_err_once(_gt, _fmt, ...) \
xe_gt_printk((_gt), err_once, _fmt, ##__VA_ARGS__)
-#define xe_gt_err(_gt, _fmt, ...) \
- xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
+#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
+ xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
#define xe_gt_warn(_gt, _fmt, ...) \
xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__)
@@ -31,20 +35,20 @@
#define xe_gt_dbg(_gt, _fmt, ...) \
xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__)
-#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
- xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
+#define xe_gt_WARN_type(_gt, _type, _condition, _fmt, ...) \
+ xe_tile_WARN##_type((_gt)->tile, _condition, _fmt, ## __VA_ARGS__)
#define xe_gt_WARN(_gt, _condition, _fmt, ...) \
- drm_WARN(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_gt_WARN_type((_gt),, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
#define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \
- drm_WARN_ONCE(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_gt_WARN_type((_gt), _ONCE, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
#define xe_gt_WARN_ON(_gt, _condition) \
- xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition))
+ xe_gt_WARN((_gt), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
#define xe_gt_WARN_ON_ONCE(_gt, _condition) \
- xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
+ xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf)
{
@@ -67,12 +71,12 @@ static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *
/*
* The original xe_gt_dbg() callsite annotations are useless here,
- * redirect to the tweaked drm_dbg_printer() instead.
+ * redirect to the tweaked xe_tile_dbg_printer() instead.
*/
- dbg = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, NULL);
+ dbg = xe_tile_dbg_printer((gt)->tile);
dbg.origin = p->origin;
- drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf);
+ drm_printf(&dbg, __XE_GT_PRINTK_FMT(gt, "%pV", vaf));
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 4cae2d514306..6344b5205c08 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1478,23 +1478,16 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
return 0;
xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M);
- bo = xe_bo_create_locked(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_NEEDS_2M |
- XE_BO_FLAG_PINNED |
- XE_BO_FLAG_PINNED_LATE_RESTORE);
+ bo = xe_bo_create_pin_range_novm(xe, tile,
+ ALIGN(size, PAGE_SIZE), 0, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_NEEDS_2M |
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_PINNED_LATE_RESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
- err = xe_bo_pin(bo);
- xe_bo_unlock(bo);
- if (unlikely(err)) {
- xe_bo_put(bo);
- return err;
- }
-
config->lmem_obj = bo;
if (xe_device_has_lmtt(xe)) {
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
index c712111aa30d..44cc612b0a75 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -55,12 +55,12 @@ static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
xe_gt_assert(gt, size % sizeof(u32) == 0);
xe_gt_assert(gt, size == ndwords * sizeof(u32));
- bo = xe_bo_create_pin_map(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo = xe_bo_create_pin_map_novm(xe, tile,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE, false);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -91,12 +91,12 @@ static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
xe_gt_assert(gt, size % sizeof(u32) == 0);
xe_gt_assert(gt, size == ndwords * sizeof(u32));
- bo = xe_bo_create_pin_map(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo = xe_bo_create_pin_map_novm(xe, tile,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE, false);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index 30f942671c2b..5f74706bab81 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -26,11 +26,46 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
atomic64_add(incr, &gt->stats.counters[id]);
}
+#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name
+
static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
- "svm_pagefault_count",
- "tlb_inval_count",
- "vma_pagefault_count",
- "vma_pagefault_kb",
+ DEF_STAT_STR(SVM_PAGEFAULT_COUNT, "svm_pagefault_count"),
+ DEF_STAT_STR(TLB_INVAL, "tlb_inval_count"),
+ DEF_STAT_STR(SVM_TLB_INVAL_COUNT, "svm_tlb_inval_count"),
+ DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"),
+ DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"),
+ DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"),
+ DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"),
+ DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"),
+ DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"),
+ DEF_STAT_STR(SVM_4K_VALID_PAGEFAULT_COUNT, "svm_4K_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_64K_VALID_PAGEFAULT_COUNT, "svm_64K_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_2M_VALID_PAGEFAULT_COUNT, "svm_2M_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_4K_PAGEFAULT_US, "svm_4K_pagefault_us"),
+ DEF_STAT_STR(SVM_64K_PAGEFAULT_US, "svm_64K_pagefault_us"),
+ DEF_STAT_STR(SVM_2M_PAGEFAULT_US, "svm_2M_pagefault_us"),
+ DEF_STAT_STR(SVM_4K_MIGRATE_COUNT, "svm_4K_migrate_count"),
+ DEF_STAT_STR(SVM_64K_MIGRATE_COUNT, "svm_64K_migrate_count"),
+ DEF_STAT_STR(SVM_2M_MIGRATE_COUNT, "svm_2M_migrate_count"),
+ DEF_STAT_STR(SVM_4K_MIGRATE_US, "svm_4K_migrate_us"),
+ DEF_STAT_STR(SVM_64K_MIGRATE_US, "svm_64K_migrate_us"),
+ DEF_STAT_STR(SVM_2M_MIGRATE_US, "svm_2M_migrate_us"),
+ DEF_STAT_STR(SVM_DEVICE_COPY_US, "svm_device_copy_us"),
+ DEF_STAT_STR(SVM_4K_DEVICE_COPY_US, "svm_4K_device_copy_us"),
+ DEF_STAT_STR(SVM_64K_DEVICE_COPY_US, "svm_64K_device_copy_us"),
+ DEF_STAT_STR(SVM_2M_DEVICE_COPY_US, "svm_2M_device_copy_us"),
+ DEF_STAT_STR(SVM_CPU_COPY_US, "svm_cpu_copy_us"),
+ DEF_STAT_STR(SVM_4K_CPU_COPY_US, "svm_4K_cpu_copy_us"),
+ DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"),
+ DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"),
+ DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"),
+ DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"),
+ DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"),
+ DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"),
+ DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"),
+ DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"),
+ DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"),
+ DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"),
};
/**
@@ -50,3 +85,17 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
return 0;
}
+
+/**
+ * xe_gt_stats_clear - Clear the GT stats
+ * @gt: GT structure
+ *
+ * This clear (zeros) all the available GT stats.
+ */
+void xe_gt_stats_clear(struct xe_gt *gt)
+{
+ int id;
+
+ for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id)
+ atomic64_set(&gt->stats.counters[id], 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
index 38325ef53617..e8aea32bc971 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -13,6 +13,7 @@ struct drm_printer;
#ifdef CONFIG_DEBUG_FS
int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_stats_clear(struct xe_gt *gt);
void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
#else
static inline void
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
index be3244d7133c..d8348a8de2e1 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -9,8 +9,41 @@
enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
XE_GT_STATS_ID_TLB_INVAL,
+ XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT,
+ XE_GT_STATS_ID_SVM_TLB_INVAL_US,
XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT,
XE_GT_STATS_ID_VMA_PAGEFAULT_KB,
+ XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_4K_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_64K_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_2M_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_4K_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_64K_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_2M_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_4K_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_64K_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_2M_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_4K_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_64K_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_2M_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_DEVICE_COPY_KB,
+ XE_GT_STATS_ID_SVM_CPU_COPY_KB,
+ XE_GT_STATS_ID_SVM_4K_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_64K_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_2M_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_4K_BIND_US,
+ XE_GT_STATS_ID_SVM_64K_BIND_US,
+ XE_GT_STATS_ID_SVM_2M_BIND_US,
/* must be the last entry */
__XE_GT_STATS_NUM_IDS,
};
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index a0baa560dd71..4e61c5e39bcb 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -12,6 +12,7 @@
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_gt.h"
+#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_mmio.h"
#include "xe_wa.h"
@@ -122,6 +123,21 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
}
}
+bool xe_gt_topology_report_l3(struct xe_gt *gt)
+{
+ /*
+ * No known userspace needs/uses the L3 bank mask reported by
+ * the media GT, and the hardware itself is known to report bogus
+ * values on several platforms. Only report L3 bank mask as part
+ * of the media GT's topology on pre-Xe3 platforms since that's
+ * already part of our ABI.
+ */
+ if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30)
+ return false;
+
+ return true;
+}
+
static void
load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
{
@@ -129,16 +145,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
struct xe_mmio *mmio = &gt->mmio;
u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3);
- /*
- * PTL platforms with media version 30.00 do not provide proper values
- * for the media GT's L3 bank registers. Skip the readout since we
- * don't have any way to obtain real values.
- *
- * This may get re-described as an official workaround in the future,
- * but there's no tracking number assigned yet so we use a custom
- * OOB workaround descriptor.
- */
- if (XE_GT_WA(gt, no_media_l3))
+ if (!xe_gt_topology_report_l3(gt))
return;
if (GRAPHICS_VER(xe) >= 30) {
@@ -275,8 +282,9 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "EU type: %s\n",
eu_type_to_str(gt->fuse_topo.eu_type));
- drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
- gt->fuse_topo.l3_bank_mask);
+ if (xe_gt_topology_report_l3(gt))
+ drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
+ gt->fuse_topo.l3_bank_mask);
}
/*
@@ -328,3 +336,19 @@ bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
{
return test_bit(dss, gt->fuse_topo.c_dss_mask);
}
+
+bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt)
+{
+ unsigned int xecore;
+ int last_group = -1;
+ u16 group, instance;
+
+ for_each_dss_steering(xecore, gt, group, instance) {
+ if (last_group != group) {
+ if (group - last_group > 1)
+ return true;
+ last_group = group;
+ }
+ }
+ return false;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index c8140704ad4c..5e62f5949b7b 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -47,4 +47,8 @@ xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss);
bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss);
+bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt);
+
+bool xe_gt_topology_report_l3(struct xe_gt *gt);
+
#endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index fb7bcb9185b7..00789844ea4d 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -74,8 +74,7 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
flags |= GUC_LOG_DISABLED;
else
- flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
- GUC_LOG_VERBOSITY_SHIFT;
+ flags |= FIELD_PREP(GUC_LOG_VERBOSITY, GUC_LOG_LEVEL_TO_VERBOSITY(level));
return flags;
}
@@ -122,22 +121,14 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
- BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
- (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
- BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
- (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
- BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
- (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
-
flags = GUC_LOG_VALID |
GUC_LOG_NOTIFY_ON_HALF_FULL |
CAPTURE_FLAG |
LOG_FLAG |
- ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
- ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
- ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
- GUC_LOG_CAPTURE_SHIFT) |
- (offset << GUC_LOG_BUF_ADDR_SHIFT);
+ FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_BUF_ADDR, offset);
#undef LOG_UNIT
#undef LOG_FLAG
@@ -150,7 +141,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
static u32 guc_ctl_ads_flags(struct xe_guc *guc)
{
u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT;
- u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+ u32 flags = FIELD_PREP(GUC_ADS_ADDR, ads);
return flags;
}
@@ -709,10 +700,6 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
- ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo);
- if (ret)
- return ret;
-
return 0;
}
@@ -847,6 +834,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
+ ret = xe_guc_ct_init_post_hwconfig(&guc->ct);
+ if (ret)
+ return ret;
+
guc_init_params_post_hwconfig(guc);
ret = xe_guc_submit_init(guc, ~0);
@@ -1064,7 +1055,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
#endif
#define GUC_LOAD_TIME_WARN_MS 200
-static void guc_wait_ucode(struct xe_guc *guc)
+static int guc_wait_ucode(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
struct xe_mmio *mmio = &gt->mmio;
@@ -1171,7 +1162,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
break;
}
- xe_device_declare_wedged(gt_to_xe(gt));
+ return -EPROTO;
} else if (delta_ms > GUC_LOAD_TIME_WARN_MS) {
xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n",
delta_ms, status, count);
@@ -1183,7 +1174,10 @@ static void guc_wait_ucode(struct xe_guc *guc)
delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
before_freq, status, count);
}
+
+ return 0;
}
+ALLOW_ERROR_INJECTION(guc_wait_ucode, ERRNO);
static int __xe_guc_upload(struct xe_guc *guc)
{
@@ -1215,14 +1209,16 @@ static int __xe_guc_upload(struct xe_guc *guc)
goto out;
/* Wait for authentication */
- guc_wait_ucode(guc);
+ ret = guc_wait_ucode(guc);
+ if (ret)
+ goto out;
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
return 0;
out:
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
- return 0 /* FIXME: ret, don't want to stop load currently */;
+ return ret;
}
static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc)
@@ -1693,3 +1689,7 @@ void xe_guc_declare_wedged(struct xe_guc *guc)
xe_guc_ct_stop(&guc->ct);
xe_guc_submit_wedge(guc);
}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_g2g_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 22cf019a11bf..1cca05967e62 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -53,6 +53,10 @@ void xe_guc_stop(struct xe_guc *guc);
int xe_guc_start(struct xe_guc *guc);
void xe_guc_declare_wedged(struct xe_guc *guc);
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len);
+#endif
+
static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
{
switch (class) {
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 5631722f34f5..58e0b0294a5b 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -339,7 +339,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
if (XE_GT_WA(gt, 13011645652)) {
u32 data = 0xC40;
- guc_waklv_enable(ads, &data, sizeof(data) / sizeof(u32), &offset, &remain,
+ guc_waklv_enable(ads, &data, 1, &offset, &remain,
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE);
}
@@ -355,7 +355,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
0x0,
0xF,
};
- guc_waklv_enable(ads, data, sizeof(data) / sizeof(u32), &offset, &remain,
+ guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain,
GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 848065a25c44..18f6327bf552 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -39,6 +39,8 @@ static void receive_g2h(struct xe_guc_ct *ct);
static void g2h_worker_func(struct work_struct *w);
static void safe_mode_worker_func(struct work_struct *w);
static void ct_exit_safe_mode(struct xe_guc_ct *ct);
+static void guc_ct_change_state(struct xe_guc_ct *ct,
+ enum xe_guc_ct_state state);
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
enum {
@@ -252,6 +254,13 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct)
}
ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */
+static void guc_action_disable_ct(void *arg)
+{
+ struct xe_guc_ct *ct = arg;
+
+ guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED);
+}
+
int xe_guc_ct_init(struct xe_guc_ct *ct)
{
struct xe_device *xe = ct_to_xe(ct);
@@ -268,10 +277,39 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
return PTR_ERR(bo);
ct->bo = bo;
- return 0;
+
+ return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct);
}
ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
+/**
+ * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM
+ * @ct: the &xe_guc_ct
+ *
+ * Allocate a new BO in VRAM and free the previous BO that was allocated
+ * in system memory (SMEM). Applicable only for DGFX products.
+ *
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_tile *tile = gt_to_tile(gt);
+ int ret;
+
+ xe_assert(xe, !xe_guc_ct_enabled(ct));
+
+ if (IS_DGFX(xe)) {
+ ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo);
+ if (ret)
+ return ret;
+ }
+
+ devm_remove_action(xe->drm.dev, guc_action_disable_ct, ct);
+ return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct);
+}
+
#define desc_read(xe_, guc_ctb__, field_) \
xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
struct guc_ct_buffer_desc, field_)
@@ -1040,11 +1078,15 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
return true;
}
+#define GUC_SEND_RETRY_LIMIT 50
+#define GUC_SEND_RETRY_MSLEEP 5
+
static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buffer, bool no_fail)
{
struct xe_gt *gt = ct_to_gt(ct);
struct g2h_fence g2h_fence;
+ unsigned int retries = 0;
int ret = 0;
/*
@@ -1109,6 +1151,12 @@ retry_same_fence:
xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
action[0], g2h_fence.reason);
mutex_unlock(&ct->lock);
+ if (++retries > GUC_SEND_RETRY_LIMIT) {
+ xe_gt_err(gt, "H2G action %#x reached retry limit=%u, aborting\n",
+ action[0], GUC_SEND_RETRY_LIMIT);
+ return -ELOOP;
+ }
+ msleep(GUC_SEND_RETRY_MSLEEP * retries);
goto retry;
}
if (g2h_fence.fail) {
@@ -1438,6 +1486,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
case XE_GUC_ACTION_NOTIFY_EXCEPTION:
ret = guc_crash_process_msg(ct, action);
break;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+ case XE_GUC_ACTION_TEST_G2G_RECV:
+ ret = xe_guc_g2g_test_notification(guc, payload, adj_len);
+ break;
+#endif
default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 18d4225e6502..cf41210ab30a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -13,6 +13,7 @@ struct xe_device;
int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct);
int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
void xe_guc_ct_disable(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
index 92e1f9f41b8c..2b99c1ebdd58 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
@@ -94,16 +94,17 @@ static int allocate_engine_activity_buffers(struct xe_guc *guc,
struct xe_tile *tile = gt_to_tile(gt);
struct xe_bo *bo, *metadata_bo;
- metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size),
- ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE);
+ metadata_bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(metadata_size),
+ ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE,
+ false);
if (IS_ERR(metadata_bo))
return PTR_ERR(metadata_bo);
- bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size),
- ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE);
+ bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(size),
+ ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, false);
if (IS_ERR(bo)) {
xe_bo_unpin_map_no_vm(metadata_bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 0508f1064178..50c4c2406132 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -15,6 +15,7 @@
#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4
#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
#define G2H_LEN_DW_TLB_INVALIDATE 3
+#define G2H_LEN_DW_G2G_NOTIFY_MIN 3
#define GUC_ID_MAX 65535
#define GUC_ID_UNKNOWN 0xffffffff
@@ -65,6 +66,7 @@ struct guc_ctxt_registration_info {
u32 hwlrca_hi;
};
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1)
/* 32-bit KLV structure as used by policy updates and others */
struct guc_klv_generic_dw_t {
@@ -89,13 +91,10 @@ struct guc_update_exec_queue_policy {
#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1)
#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2)
#define GUC_LOG_LOG_ALLOC_UNITS BIT(3)
-#define GUC_LOG_CRASH_SHIFT 4
-#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT)
-#define GUC_LOG_DEBUG_SHIFT 6
-#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT)
-#define GUC_LOG_CAPTURE_SHIFT 10
-#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT)
-#define GUC_LOG_BUF_ADDR_SHIFT 12
+#define GUC_LOG_CRASH REG_GENMASK(5, 4)
+#define GUC_LOG_DEBUG REG_GENMASK(9, 6)
+#define GUC_LOG_CAPTURE REG_GENMASK(11, 10)
+#define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12)
#define GUC_CTL_WA 1
#define GUC_WA_GAM_CREDITS BIT(10)
@@ -117,21 +116,14 @@ struct guc_update_exec_queue_policy {
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
#define GUC_CTL_DEBUG 3
-#define GUC_LOG_VERBOSITY_SHIFT 0
-#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_MIN 0
+#define GUC_LOG_VERBOSITY REG_GENMASK(1, 0)
#define GUC_LOG_VERBOSITY_MAX 3
-#define GUC_LOG_VERBOSITY_MASK 0x0000000f
-#define GUC_LOG_DESTINATION_MASK (3 << 4)
-#define GUC_LOG_DISABLED (1 << 6)
-#define GUC_PROFILE_ENABLED (1 << 7)
+#define GUC_LOG_DESTINATION REG_GENMASK(5, 4)
+#define GUC_LOG_DISABLED BIT(6)
+#define GUC_PROFILE_ENABLED BIT(7)
#define GUC_CTL_ADS 4
-#define GUC_ADS_ADDR_SHIFT 1
-#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT)
+#define GUC_ADS_ADDR REG_GENMASK(21, 1)
#define GUC_CTL_DEVID 5
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
index f1e2b0be90a9..98a47ac42b08 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.h
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -17,7 +17,7 @@ struct xe_device;
#define DEBUG_BUFFER_SIZE SZ_8M
#define CAPTURE_BUFFER_SIZE SZ_2M
#else
-#define CRASH_BUFFER_SIZE SZ_8K
+#define CRASH_BUFFER_SIZE SZ_16K
#define DEBUG_BUFFER_SIZE SZ_64K
#define CAPTURE_BUFFER_SIZE SZ_1M
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 88557e86d637..53fdf59524c4 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -79,6 +79,11 @@
* Xe driver enables SLPC with all of its defaults features and frequency
* selection, which varies per platform.
*
+ * Power profiles add another level of control to SLPC. When power saving
+ * profile is chosen, SLPC will use conservative thresholds to ramp frequency,
+ * thus saving power. Base profile is default and ensures balanced performance
+ * for any workload.
+ *
* Render-C States:
* ================
*
@@ -1171,6 +1176,61 @@ static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val)
return ret;
}
+static const char *power_profile_to_string(struct xe_guc_pc *pc)
+{
+ switch (pc->power_profile) {
+ case SLPC_POWER_PROFILE_BASE:
+ return "base";
+ case SLPC_POWER_PROFILE_POWER_SAVING:
+ return "power_saving";
+ default:
+ return "invalid";
+ }
+}
+
+void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile)
+{
+ switch (pc->power_profile) {
+ case SLPC_POWER_PROFILE_BASE:
+ sprintf(profile, "[%s] %s\n", "base", "power_saving");
+ break;
+ case SLPC_POWER_PROFILE_POWER_SAVING:
+ sprintf(profile, "%s [%s]\n", "base", "power_saving");
+ break;
+ default:
+ sprintf(profile, "invalid");
+ }
+}
+
+int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf)
+{
+ int ret = 0;
+ u32 val;
+
+ if (strncmp("base", buf, strlen("base")) == 0)
+ val = SLPC_POWER_PROFILE_BASE;
+ else if (strncmp("power_saving", buf, strlen("power_saving")) == 0)
+ val = SLPC_POWER_PROFILE_POWER_SAVING;
+ else
+ return -EINVAL;
+
+ guard(mutex)(&pc->freq_lock);
+ xe_pm_runtime_get_noresume(pc_to_xe(pc));
+
+ ret = pc_action_set_param(pc,
+ SLPC_PARAM_POWER_PROFILE,
+ val);
+ if (ret)
+ xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n",
+ val, ERR_PTR(ret));
+ else
+ pc->power_profile = val;
+
+ xe_pm_runtime_put(pc_to_xe(pc));
+
+ return ret;
+}
+
/**
* xe_guc_pc_start - Start GuC's Power Conservation component
* @pc: Xe_GuC_PC instance
@@ -1249,6 +1309,11 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
/* Enable SLPC Optimized Strategy for compute */
ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
+ /* Set cached value of power_profile */
+ ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc));
+ if (unlikely(ret))
+ xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret));
+
out:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return ret;
@@ -1327,6 +1392,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
pc->bo = bo;
+ pc->power_profile = SLPC_POWER_PROFILE_BASE;
+
return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 52ecdd5ddbff..0e31396f103c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -31,6 +31,8 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq);
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq);
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq);
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq);
+int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf);
+void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile);
enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index c02053948a57..5e4ea53fbee6 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -37,6 +37,8 @@ struct xe_guc_pc {
struct mutex freq_lock;
/** @freq_ready: Only handle freq changes, if they are really ready */
bool freq_ready;
+ /** @power_profile: Base or power_saving profile */
+ u32 power_profile;
};
#endif /* _XE_GUC_PC_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index e377ba3a39b3..53024eb5670b 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -355,7 +355,7 @@ static int guc_init_global_schedule_policy(struct xe_guc *guc)
ret = xe_guc_ct_send_block(&guc->ct, data, count);
if (ret < 0) {
xe_gt_err(guc_to_gt(guc),
- "failed to enable GuC sheduling policies: %pe\n",
+ "failed to enable GuC scheduling policies: %pe\n",
ERR_PTR(ret));
return ret;
}
@@ -624,10 +624,8 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
info.engine_submit_mask = q->logical_mask;
info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
- info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
-
- if (ctx_type != GUC_CONTEXT_NORMAL)
- info.flags |= BIT(ctx_type);
+ info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
+ FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
if (xe_exec_queue_is_parallel(q)) {
u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
@@ -2105,7 +2103,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
if (unlikely(!q)) {
- xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id);
+ xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
return NULL;
}
@@ -2604,7 +2602,7 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
}
/**
- * xe_guc_register_exec_queue - Register exec queue for a given context type.
+ * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
* @q: Execution queue
* @ctx_type: Type of the context
*
@@ -2615,15 +2613,17 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
*
* Returns - None.
*/
-void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type)
+void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
- xe_assert(xe, IS_SRIOV_VF(xe));
- xe_assert(xe, !IS_DGFX(xe));
- xe_assert(xe, (ctx_type > GUC_CONTEXT_NORMAL &&
- ctx_type < GUC_CONTEXT_COUNT));
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
+ xe_gt_assert(gt, !IS_DGFX(xe));
+ xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
+ ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
+ xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
register_exec_queue(q, ctx_type);
enable_scheduling(q);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index e20ccafdfab5..78c3f07e31a0 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -48,7 +48,7 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
void
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot);
void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
-void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type);
+void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type);
int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch);
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
deleted file mode 100644
index 57b71956ddf4..000000000000
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2024 Intel Corporation
- */
-
-#include <linux/scatterlist.h>
-#include <linux/mmu_notifier.h>
-#include <linux/dma-mapping.h>
-#include <linux/memremap.h>
-#include <linux/swap.h>
-#include <linux/hmm.h>
-#include <linux/mm.h>
-#include "xe_hmm.h"
-#include "xe_vm.h"
-#include "xe_bo.h"
-
-static u64 xe_npages_in_range(unsigned long start, unsigned long end)
-{
- return (end - start) >> PAGE_SHIFT;
-}
-
-static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
- struct hmm_range *range, struct rw_semaphore *notifier_sem)
-{
- unsigned long i, npages, hmm_pfn;
- unsigned long num_chunks = 0;
- int ret;
-
- /* HMM docs says this is needed. */
- ret = down_read_interruptible(notifier_sem);
- if (ret)
- return ret;
-
- if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) {
- up_read(notifier_sem);
- return -EAGAIN;
- }
-
- npages = xe_npages_in_range(range->start, range->end);
- for (i = 0; i < npages;) {
- unsigned long len;
-
- hmm_pfn = range->hmm_pfns[i];
- xe_assert(xe, hmm_pfn & HMM_PFN_VALID);
-
- len = 1UL << hmm_pfn_to_map_order(hmm_pfn);
-
- /* If order > 0 the page may extend beyond range->start */
- len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1);
- i += len;
- num_chunks++;
- }
- up_read(notifier_sem);
-
- return sg_alloc_table(st, num_chunks, GFP_KERNEL);
-}
-
-/**
- * xe_build_sg() - build a scatter gather table for all the physical pages/pfn
- * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
- * and will be used to program GPU page table later.
- * @xe: the xe device who will access the dma-address in sg table
- * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
- * has the pfn numbers of pages that back up this hmm address range.
- * @st: pointer to the sg table.
- * @notifier_sem: The xe notifier lock.
- * @write: whether we write to this range. This decides dma map direction
- * for system pages. If write we map it bi-diretional; otherwise
- * DMA_TO_DEVICE
- *
- * All the contiguous pfns will be collapsed into one entry in
- * the scatter gather table. This is for the purpose of efficiently
- * programming GPU page table.
- *
- * The dma_address in the sg table will later be used by GPU to
- * access memory. So if the memory is system memory, we need to
- * do a dma-mapping so it can be accessed by GPU/DMA.
- *
- * FIXME: This function currently only support pages in system
- * memory. If the memory is GPU local memory (of the GPU who
- * is going to access memory), we need gpu dpa (device physical
- * address), and there is no need of dma-mapping. This is TBD.
- *
- * FIXME: dma-mapping for peer gpu device to access remote gpu's
- * memory. Add this when you support p2p
- *
- * This function allocates the storage of the sg table. It is
- * caller's responsibility to free it calling sg_free_table.
- *
- * Returns 0 if successful; -ENOMEM if fails to allocate memory
- */
-static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
- struct sg_table *st,
- struct rw_semaphore *notifier_sem,
- bool write)
-{
- unsigned long npages = xe_npages_in_range(range->start, range->end);
- struct device *dev = xe->drm.dev;
- struct scatterlist *sgl;
- struct page *page;
- unsigned long i, j;
-
- lockdep_assert_held(notifier_sem);
-
- i = 0;
- for_each_sg(st->sgl, sgl, st->nents, j) {
- unsigned long hmm_pfn, size;
-
- hmm_pfn = range->hmm_pfns[i];
- page = hmm_pfn_to_page(hmm_pfn);
- xe_assert(xe, !is_device_private_page(page));
-
- size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
- size -= page_to_pfn(page) & (size - 1);
- i += size;
-
- if (unlikely(j == st->nents - 1)) {
- xe_assert(xe, i >= npages);
- if (i > npages)
- size -= (i - npages);
-
- sg_mark_end(sgl);
- } else {
- xe_assert(xe, i < npages);
- }
-
- sg_set_page(sgl, page, size << PAGE_SHIFT, 0);
- }
-
- return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
-}
-
-static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vm *vm = xe_vma_vm(&uvma->vma);
-
- lockdep_assert_held_write(&vm->lock);
- lockdep_assert_held(&vm->userptr.notifier_lock);
-
- mutex_lock(&userptr->unmap_mutex);
- xe_assert(vm->xe, !userptr->mapped);
- userptr->mapped = true;
- mutex_unlock(&userptr->unmap_mutex);
-}
-
-void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vma *vma = &uvma->vma;
- bool write = !xe_vma_read_only(vma);
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_device *xe = vm->xe;
-
- if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) &&
- !lockdep_is_held_type(&vm->lock, 0) &&
- !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
- /* Don't unmap in exec critical section. */
- xe_vm_assert_held(vm);
- /* Don't unmap while mapping the sg. */
- lockdep_assert_held(&vm->lock);
- }
-
- mutex_lock(&userptr->unmap_mutex);
- if (userptr->sg && userptr->mapped)
- dma_unmap_sgtable(xe->drm.dev, userptr->sg,
- write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
- userptr->mapped = false;
- mutex_unlock(&userptr->unmap_mutex);
-}
-
-/**
- * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr
- * @uvma: the userptr vma which hold the scatter gather table
- *
- * With function xe_userptr_populate_range, we allocate storage of
- * the userptr sg table. This is a helper function to free this
- * sg table, and dma unmap the address in the table.
- */
-void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
-
- xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg);
- xe_hmm_userptr_unmap(uvma);
- sg_free_table(userptr->sg);
- userptr->sg = NULL;
-}
-
-/**
- * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual
- * address range
- *
- * @uvma: userptr vma which has information of the range to populate.
- * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller.
- *
- * This function populate the physical pages of a virtual
- * address range. The populated physical pages is saved in
- * userptr's sg table. It is similar to get_user_pages but call
- * hmm_range_fault.
- *
- * This function also read mmu notifier sequence # (
- * mmu_interval_read_begin), for the purpose of later
- * comparison (through mmu_interval_read_retry).
- *
- * This must be called with mmap read or write lock held.
- *
- * This function allocates the storage of the userptr sg table.
- * It is caller's responsibility to free it calling sg_free_table.
- *
- * returns: 0 for success; negative error no on failure
- */
-int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
- bool is_mm_mmap_locked)
-{
- unsigned long timeout =
- jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- unsigned long *pfns;
- struct xe_userptr *userptr;
- struct xe_vma *vma = &uvma->vma;
- u64 userptr_start = xe_vma_userptr(vma);
- u64 userptr_end = userptr_start + xe_vma_size(vma);
- struct xe_vm *vm = xe_vma_vm(vma);
- struct hmm_range hmm_range = {
- .pfn_flags_mask = 0, /* ignore pfns */
- .default_flags = HMM_PFN_REQ_FAULT,
- .start = userptr_start,
- .end = userptr_end,
- .notifier = &uvma->userptr.notifier,
- .dev_private_owner = vm->xe,
- };
- bool write = !xe_vma_read_only(vma);
- unsigned long notifier_seq;
- u64 npages;
- int ret;
-
- userptr = &uvma->userptr;
-
- if (is_mm_mmap_locked)
- mmap_assert_locked(userptr->notifier.mm);
-
- if (vma->gpuva.flags & XE_VMA_DESTROYED)
- return 0;
-
- notifier_seq = mmu_interval_read_begin(&userptr->notifier);
- if (notifier_seq == userptr->notifier_seq)
- return 0;
-
- if (userptr->sg)
- xe_hmm_userptr_free_sg(uvma);
-
- npages = xe_npages_in_range(userptr_start, userptr_end);
- pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns))
- return -ENOMEM;
-
- if (write)
- hmm_range.default_flags |= HMM_PFN_REQ_WRITE;
-
- if (!mmget_not_zero(userptr->notifier.mm)) {
- ret = -EFAULT;
- goto free_pfns;
- }
-
- hmm_range.hmm_pfns = pfns;
-
- while (true) {
- hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);
-
- if (!is_mm_mmap_locked)
- mmap_read_lock(userptr->notifier.mm);
-
- ret = hmm_range_fault(&hmm_range);
-
- if (!is_mm_mmap_locked)
- mmap_read_unlock(userptr->notifier.mm);
-
- if (ret == -EBUSY) {
- if (time_after(jiffies, timeout))
- break;
-
- continue;
- }
- break;
- }
-
- mmput(userptr->notifier.mm);
-
- if (ret)
- goto free_pfns;
-
- ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock);
- if (ret)
- goto free_pfns;
-
- ret = down_read_interruptible(&vm->userptr.notifier_lock);
- if (ret)
- goto free_st;
-
- if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
-
- ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt,
- &vm->userptr.notifier_lock, write);
- if (ret)
- goto out_unlock;
-
- userptr->sg = &userptr->sgt;
- xe_hmm_userptr_set_mapped(uvma);
- userptr->notifier_seq = hmm_range.notifier_seq;
- up_read(&vm->userptr.notifier_lock);
- kvfree(pfns);
- return 0;
-
-out_unlock:
- up_read(&vm->userptr.notifier_lock);
-free_st:
- sg_free_table(&userptr->sgt);
-free_pfns:
- kvfree(pfns);
- return ret;
-}
diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h
deleted file mode 100644
index 0ea98d8e7bbc..000000000000
--- a/drivers/gpu/drm/xe/xe_hmm.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Copyright © 2024 Intel Corporation
- */
-
-#ifndef _XE_HMM_H_
-#define _XE_HMM_H_
-
-#include <linux/types.h>
-
-struct xe_userptr_vma;
-
-int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked);
-
-void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma);
-
-void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma);
-#endif
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 5ade08f90b89..b6790589e623 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -1297,13 +1297,6 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
xe_hwmon_fan_input_read(hwmon, channel, &fan_speed);
}
-static void xe_hwmon_mutex_destroy(void *arg)
-{
- struct xe_hwmon *hwmon = arg;
-
- mutex_destroy(&hwmon->hwmon_lock);
-}
-
int xe_hwmon_register(struct xe_device *xe)
{
struct device *dev = xe->drm.dev;
@@ -1322,8 +1315,7 @@ int xe_hwmon_register(struct xe_device *xe)
if (!hwmon)
return -ENOMEM;
- mutex_init(&hwmon->hwmon_lock);
- ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon);
+ ret = devm_mutex_init(dev, &hwmon->hwmon_lock);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c
index 044dda517b7c..48dfcb41fa08 100644
--- a/drivers/gpu/drm/xe/xe_i2c.c
+++ b/drivers/gpu/drm/xe/xe_i2c.c
@@ -259,7 +259,7 @@ void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold)
return;
if (d3cold)
- xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY);
+ xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0);
drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR));
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c
new file mode 100644
index 000000000000..38f3feb2aecd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_managed.h>
+#include <drm/intel/i915_component.h>
+#include <drm/intel/intel_lb_mei_interface.h>
+#include <drm/drm_print.h>
+
+#include "xe_device.h"
+#include "xe_late_bind_fw.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
+#include "xe_pm.h"
+
+/*
+ * The component should load quite quickly in most cases, but it could take
+ * a bit. Using a very big timeout just to cover the worst case scenario
+ */
+#define LB_INIT_TIMEOUT_MS 20000
+
+/*
+ * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for
+ * other OS components to release the MEI CL handle
+ */
+#define LB_FW_LOAD_RETRY_MAXCOUNT 30
+#define LB_FW_LOAD_RETRY_PAUSE_MS 200
+
+static const u32 fw_id_to_type[] = {
+ [XE_LB_FW_FAN_CONTROL] = INTEL_LB_TYPE_FAN_CONTROL,
+ };
+
+static const char * const fw_id_to_name[] = {
+ [XE_LB_FW_FAN_CONTROL] = "fan_control",
+ };
+
+static struct xe_device *
+late_bind_to_xe(struct xe_late_bind *late_bind)
+{
+ return container_of(late_bind, struct xe_device, late_bind);
+}
+
+static struct xe_device *
+late_bind_fw_to_xe(struct xe_late_bind_fw *lb_fw)
+{
+ return container_of(lb_fw, struct xe_device, late_bind.late_bind_fw[lb_fw->id]);
+}
+
+/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */
+static int parse_cpd_header(struct xe_late_bind_fw *lb_fw,
+ const void *data, size_t size, const char *manifest_entry)
+{
+ struct xe_device *xe = late_bind_fw_to_xe(lb_fw);
+ const struct gsc_cpd_header_v2 *header = data;
+ const struct gsc_manifest_header *manifest;
+ const struct gsc_cpd_entry *entry;
+ size_t min_size = sizeof(*header);
+ u32 offset;
+ int i;
+
+ /* manifest_entry is mandatory */
+ xe_assert(xe, manifest_entry);
+
+ if (size < min_size || header->header_marker != GSC_CPD_HEADER_MARKER)
+ return -ENOENT;
+
+ if (header->header_length < sizeof(struct gsc_cpd_header_v2)) {
+ drm_err(&xe->drm, "%s late binding fw: Invalid CPD header length %u!\n",
+ fw_id_to_name[lb_fw->id], header->header_length);
+ return -EINVAL;
+ }
+
+ min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries;
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ /* Look for the manifest first */
+ entry = (void *)header + header->header_length;
+ for (i = 0; i < header->num_of_entries; i++, entry++)
+ if (strcmp(entry->name, manifest_entry) == 0)
+ offset = entry->offset & GSC_CPD_ENTRY_OFFSET_MASK;
+
+ if (!offset) {
+ drm_err(&xe->drm, "%s late binding fw: Failed to find manifest_entry\n",
+ fw_id_to_name[lb_fw->id]);
+ return -ENODATA;
+ }
+
+ min_size = offset + sizeof(struct gsc_manifest_header);
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ manifest = data + offset;
+
+ lb_fw->version = manifest->fw_version;
+
+ return 0;
+}
+
+/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */
+static int parse_lb_layout(struct xe_late_bind_fw *lb_fw,
+ const void *data, size_t size, const char *fpt_entry)
+{
+ struct xe_device *xe = late_bind_fw_to_xe(lb_fw);
+ const struct csc_fpt_header *header = data;
+ const struct csc_fpt_entry *entry;
+ size_t min_size = sizeof(*header);
+ u32 offset;
+ int i;
+
+ /* fpt_entry is mandatory */
+ xe_assert(xe, fpt_entry);
+
+ if (size < min_size || header->header_marker != CSC_FPT_HEADER_MARKER)
+ return -ENOENT;
+
+ if (header->header_length < sizeof(struct csc_fpt_header)) {
+ drm_err(&xe->drm, "%s late binding fw: Invalid FPT header length %u!\n",
+ fw_id_to_name[lb_fw->id], header->header_length);
+ return -EINVAL;
+ }
+
+ min_size = header->header_length + sizeof(struct csc_fpt_entry) * header->num_of_entries;
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ /* Look for the cpd header first */
+ entry = (void *)header + header->header_length;
+ for (i = 0; i < header->num_of_entries; i++, entry++)
+ if (strcmp(entry->name, fpt_entry) == 0)
+ offset = entry->offset;
+
+ if (!offset) {
+ drm_err(&xe->drm, "%s late binding fw: Failed to find fpt_entry\n",
+ fw_id_to_name[lb_fw->id]);
+ return -ENODATA;
+ }
+
+ min_size = offset + sizeof(struct gsc_cpd_header_v2);
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ return parse_cpd_header(lb_fw, data + offset, size - offset, "LTES.man");
+}
+
+static const char *xe_late_bind_parse_status(uint32_t status)
+{
+ switch (status) {
+ case INTEL_LB_STATUS_SUCCESS:
+ return "success";
+ case INTEL_LB_STATUS_4ID_MISMATCH:
+ return "4Id Mismatch";
+ case INTEL_LB_STATUS_ARB_FAILURE:
+ return "ARB Failure";
+ case INTEL_LB_STATUS_GENERAL_ERROR:
+ return "General Error";
+ case INTEL_LB_STATUS_INVALID_PARAMS:
+ return "Invalid Params";
+ case INTEL_LB_STATUS_INVALID_SIGNATURE:
+ return "Invalid Signature";
+ case INTEL_LB_STATUS_INVALID_PAYLOAD:
+ return "Invalid Payload";
+ case INTEL_LB_STATUS_TIMEOUT:
+ return "Timeout";
+ default:
+ return "Unknown error";
+ }
+}
+
+static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+ u32 uval;
+
+ if (!xe_pcode_read(root_tile,
+ PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), &uval, NULL))
+ return uval;
+ else
+ return 0;
+}
+
+void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_late_bind_fw *lbfw;
+ int fw_id;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ lbfw = &late_bind->late_bind_fw[fw_id];
+ if (lbfw->payload && late_bind->wq) {
+ drm_dbg(&xe->drm, "Flush work: load %s firmware\n",
+ fw_id_to_name[lbfw->id]);
+ flush_work(&lbfw->work);
+ }
+ }
+}
+
+static void xe_late_bind_work(struct work_struct *work)
+{
+ struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work);
+ struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind,
+ late_bind_fw[lbfw->id]);
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ int retry = LB_FW_LOAD_RETRY_MAXCOUNT;
+ int ret;
+ int slept;
+
+ xe_device_assert_mem_access(xe);
+
+ /* we can queue this before the component is bound */
+ for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) {
+ if (late_bind->component.ops)
+ break;
+ msleep(100);
+ }
+
+ if (!late_bind->component.ops) {
+ drm_err(&xe->drm, "Late bind component not bound\n");
+ /* Do not re-attempt fw load */
+ drmm_kfree(&xe->drm, (void *)lbfw->payload);
+ lbfw->payload = NULL;
+ goto out;
+ }
+
+ drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]);
+
+ do {
+ ret = late_bind->component.ops->push_payload(late_bind->component.mei_dev,
+ lbfw->type,
+ lbfw->flags,
+ lbfw->payload,
+ lbfw->payload_size);
+ if (!ret)
+ break;
+ msleep(LB_FW_LOAD_RETRY_PAUSE_MS);
+ } while (--retry && ret == -EBUSY);
+
+ if (!ret) {
+ drm_dbg(&xe->drm, "Load %s firmware successful\n",
+ fw_id_to_name[lbfw->id]);
+ goto out;
+ }
+
+ if (ret > 0)
+ drm_err(&xe->drm, "Load %s firmware failed with err %d, %s\n",
+ fw_id_to_name[lbfw->id], ret, xe_late_bind_parse_status(ret));
+ else
+ drm_err(&xe->drm, "Load %s firmware failed with err %d",
+ fw_id_to_name[lbfw->id], ret);
+ /* Do not re-attempt fw load */
+ drmm_kfree(&xe->drm, (void *)lbfw->payload);
+ lbfw->payload = NULL;
+
+out:
+ xe_pm_runtime_put(xe);
+}
+
+int xe_late_bind_fw_load(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_late_bind_fw *lbfw;
+ int fw_id;
+
+ if (!late_bind->component_added)
+ return -ENODEV;
+
+ if (late_bind->disable)
+ return 0;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ lbfw = &late_bind->late_bind_fw[fw_id];
+ if (lbfw->payload) {
+ xe_pm_runtime_get_noresume(xe);
+ queue_work(late_bind->wq, &lbfw->work);
+ }
+ }
+ return 0;
+}
+
+static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct xe_late_bind_fw *lb_fw;
+ const struct firmware *fw;
+ u32 num_fans;
+ int ret;
+
+ if (fw_id >= XE_LB_FW_MAX_ID)
+ return -EINVAL;
+
+ lb_fw = &late_bind->late_bind_fw[fw_id];
+
+ lb_fw->id = fw_id;
+ lb_fw->type = fw_id_to_type[lb_fw->id];
+ lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT;
+
+ if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) {
+ num_fans = xe_late_bind_fw_num_fans(late_bind);
+ drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans);
+ if (!num_fans)
+ return 0;
+ }
+
+ snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin",
+ fw_id_to_name[lb_fw->id], pdev->device,
+ pdev->subsystem_vendor, pdev->subsystem_device);
+
+ drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path);
+ ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev);
+ if (ret) {
+ drm_dbg(&xe->drm, "%s late binding fw not available for current device",
+ fw_id_to_name[lb_fw->id]);
+ return 0;
+ }
+
+ if (fw->size > XE_LB_MAX_PAYLOAD_SIZE) {
+ drm_err(&xe->drm, "Firmware %s size %zu is larger than max pay load size %u\n",
+ lb_fw->blob_path, fw->size, XE_LB_MAX_PAYLOAD_SIZE);
+ release_firmware(fw);
+ return -ENODATA;
+ }
+
+ ret = parse_lb_layout(lb_fw, fw->data, fw->size, "LTES");
+ if (ret)
+ return ret;
+
+ lb_fw->payload_size = fw->size;
+ lb_fw->payload = drmm_kzalloc(&xe->drm, lb_fw->payload_size, GFP_KERNEL);
+ if (!lb_fw->payload) {
+ release_firmware(fw);
+ return -ENOMEM;
+ }
+
+ drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u.%u\n",
+ fw_id_to_name[lb_fw->id], lb_fw->blob_path,
+ lb_fw->version.major, lb_fw->version.minor,
+ lb_fw->version.hotfix, lb_fw->version.build);
+
+ memcpy((void *)lb_fw->payload, fw->data, lb_fw->payload_size);
+ release_firmware(fw);
+ INIT_WORK(&lb_fw->work, xe_late_bind_work);
+
+ return 0;
+}
+
+static int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
+{
+ int ret;
+ int fw_id;
+
+ late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0);
+ if (!late_bind->wq)
+ return -ENOMEM;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ ret = __xe_late_bind_fw_init(late_bind, fw_id);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int xe_late_bind_component_bind(struct device *xe_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct xe_device *xe = kdev_to_xe_device(xe_kdev);
+ struct xe_late_bind *late_bind = &xe->late_bind;
+
+ late_bind->component.ops = data;
+ late_bind->component.mei_dev = mei_kdev;
+
+ return 0;
+}
+
+static void xe_late_bind_component_unbind(struct device *xe_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct xe_device *xe = kdev_to_xe_device(xe_kdev);
+ struct xe_late_bind *late_bind = &xe->late_bind;
+
+ xe_late_bind_wait_for_worker_completion(late_bind);
+
+ late_bind->component.ops = NULL;
+}
+
+static const struct component_ops xe_late_bind_component_ops = {
+ .bind = xe_late_bind_component_bind,
+ .unbind = xe_late_bind_component_unbind,
+};
+
+static void xe_late_bind_remove(void *arg)
+{
+ struct xe_late_bind *late_bind = arg;
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+
+ xe_late_bind_wait_for_worker_completion(late_bind);
+
+ late_bind->component_added = false;
+
+ component_del(xe->drm.dev, &xe_late_bind_component_ops);
+ if (late_bind->wq) {
+ destroy_workqueue(late_bind->wq);
+ late_bind->wq = NULL;
+ }
+}
+
+/**
+ * xe_late_bind_init() - add xe mei late binding component
+ * @late_bind: pointer to late bind structure.
+ *
+ * Return: 0 if the initialization was successful, a negative errno otherwise.
+ */
+int xe_late_bind_init(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ int err;
+
+ if (!xe->info.has_late_bind)
+ return 0;
+
+ if (!IS_ENABLED(CONFIG_INTEL_MEI_LB) || !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) {
+ drm_info(&xe->drm, "Can't init xe mei late bind missing mei component\n");
+ return 0;
+ }
+
+ err = component_add_typed(xe->drm.dev, &xe_late_bind_component_ops,
+ INTEL_COMPONENT_LB);
+ if (err < 0) {
+ drm_err(&xe->drm, "Failed to add mei late bind component (%pe)\n", ERR_PTR(err));
+ return err;
+ }
+
+ late_bind->component_added = true;
+
+ err = devm_add_action_or_reset(xe->drm.dev, xe_late_bind_remove, late_bind);
+ if (err)
+ return err;
+
+ err = xe_late_bind_fw_init(late_bind);
+ if (err)
+ return err;
+
+ return xe_late_bind_fw_load(late_bind);
+}
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h
new file mode 100644
index 000000000000..07e437390539
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_LATE_BIND_FW_H_
+#define _XE_LATE_BIND_FW_H_
+
+#include <linux/types.h>
+
+struct xe_late_bind;
+
+int xe_late_bind_init(struct xe_late_bind *late_bind);
+int xe_late_bind_fw_load(struct xe_late_bind *late_bind);
+void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
new file mode 100644
index 000000000000..0f5da89ce98b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_LATE_BIND_TYPES_H_
+#define _XE_LATE_BIND_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include "xe_uc_fw_abi.h"
+
+#define XE_LB_MAX_PAYLOAD_SIZE SZ_4K
+
+/**
+ * xe_late_bind_fw_id - enum to determine late binding fw index
+ */
+enum xe_late_bind_fw_id {
+ XE_LB_FW_FAN_CONTROL = 0,
+ XE_LB_FW_MAX_ID
+};
+
+/**
+ * struct xe_late_bind_fw
+ */
+struct xe_late_bind_fw {
+ /** @id: firmware index */
+ u32 id;
+ /** @blob_path: firmware binary path */
+ char blob_path[PATH_MAX];
+ /** @type: firmware type */
+ u32 type;
+ /** @flags: firmware flags */
+ u32 flags;
+ /** @payload: to store the late binding blob */
+ const u8 *payload;
+ /** @payload_size: late binding blob payload_size */
+ size_t payload_size;
+ /** @work: worker to upload latebind blob */
+ struct work_struct work;
+ /** @version: late binding blob manifest version */
+ struct gsc_version version;
+};
+
+/**
+ * struct xe_late_bind_component - Late Binding services component
+ * @mei_dev: device that provide Late Binding service.
+ * @ops: Ops implemented by Late Binding driver, used by Xe driver.
+ *
+ * Communication between Xe and MEI drivers for Late Binding services
+ */
+struct xe_late_bind_component {
+ struct device *mei_dev;
+ const struct intel_lb_component_ops *ops;
+};
+
+/**
+ * struct xe_late_bind
+ */
+struct xe_late_bind {
+ /** @component: struct for communication with mei component */
+ struct xe_late_bind_component component;
+ /** @late_bind_fw: late binding firmware array */
+ struct xe_late_bind_fw late_bind_fw[XE_LB_FW_MAX_ID];
+ /** @wq: workqueue to submit request to download late bind blob */
+ struct workqueue_struct *wq;
+ /** @component_added: whether the component has been added */
+ bool component_added;
+ /** @disable: to block late binding reload during pm resume flow*/
+ bool disable;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index f2bfbfa3efa1..62fc5a1a332d 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -67,12 +67,12 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
goto out;
}
- bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL,
- PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
- lmtt->ops->lmtt_pte_num(level)),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
- XE_BO_FLAG_NEEDS_64K);
+ bo = xe_bo_create_pin_map_novm(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt),
+ PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+ lmtt->ops->lmtt_pte_num(level)),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+ XE_BO_FLAG_NEEDS_64K, false);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_free_pt;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 8f6c3ba47882..47e9df775072 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -8,6 +8,7 @@
#include <generated/xe_wa_oob.h>
#include <linux/ascii85.h>
+#include <linux/panic.h>
#include "instructions/xe_mi_commands.h"
#include "instructions/xe_gfxpipe_commands.h"
@@ -16,6 +17,7 @@
#include "regs/xe_lrc_layout.h"
#include "xe_bb.h"
#include "xe_bo.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue_types.h"
@@ -75,11 +77,17 @@ lrc_to_xe(struct xe_lrc *lrc)
static bool
gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
{
+ struct xe_device *xe = gt_to_xe(gt);
+
if (XE_GT_WA(gt, 16010904313) &&
(class == XE_ENGINE_CLASS_RENDER ||
class == XE_ENGINE_CLASS_COMPUTE))
return true;
+ if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev),
+ class, NULL))
+ return true;
+
return false;
}
@@ -1102,6 +1110,64 @@ static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
return cmd - batch;
}
+static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ const u32 *user_batch;
+ u32 *cmd = batch;
+ u32 count;
+
+ count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev),
+ hwe->class, &user_batch);
+ if (!count)
+ return 0;
+
+ if (count > max_len)
+ return -ENOSPC;
+
+ /*
+ * This should be used only for tests and validation. Taint the kernel
+ * as anything could be submitted directly in context switches
+ */
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
+
+ memcpy(cmd, user_batch, count * sizeof(u32));
+ cmd += count;
+
+ return cmd - batch;
+}
+
+static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ const u32 *user_batch;
+ u32 *cmd = batch;
+ u32 count;
+
+ count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev),
+ hwe->class, &user_batch);
+ if (!count)
+ return 0;
+
+ if (count > max_len)
+ return -ENOSPC;
+
+ /*
+ * This should be used only for tests and validation. Taint the kernel
+ * as anything could be submitted directly in context switches
+ */
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
+
+ memcpy(cmd, user_batch, count * sizeof(u32));
+ cmd += count;
+
+ return cmd - batch;
+}
+
static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
struct xe_hw_engine *hwe,
u32 *batch, size_t max_len)
@@ -1203,6 +1269,7 @@ int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe
{ .setup = setup_timestamp_wa },
{ .setup = setup_invalidate_state_cache_wa },
{ .setup = setup_utilization_wa },
+ { .setup = setup_configfs_post_ctx_restore_bb },
};
struct bo_setup_state state = {
.lrc = lrc,
@@ -1249,8 +1316,12 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
static int
setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
{
- static struct bo_setup rcs_funcs[] = {
+ static const struct bo_setup rcs_funcs[] = {
{ .setup = setup_timestamp_wa },
+ { .setup = setup_configfs_mid_ctx_restore_bb },
+ };
+ static const struct bo_setup xcs_funcs[] = {
+ { .setup = setup_configfs_mid_ctx_restore_bb },
};
struct bo_setup_state state = {
.lrc = lrc,
@@ -1268,6 +1339,9 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
hwe->class == XE_ENGINE_CLASS_COMPUTE) {
state.funcs = rcs_funcs;
state.num_funcs = ARRAY_SIZE(rcs_funcs);
+ } else {
+ state.funcs = xcs_funcs;
+ state.num_funcs = ARRAY_SIZE(xcs_funcs);
}
if (xe_gt_WARN_ON(lrc->gt, !state.funcs))
@@ -1294,14 +1368,15 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
finish_bo(&state);
kfree(state.buffer);
+ /*
+ * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it
+ * varies per engine class, but the default is good enough
+ */
xe_lrc_write_ctx_reg(lrc,
CTX_CS_INDIRECT_CTX,
(xe_bo_ggtt_addr(lrc->bo) + state.offset) |
/* Size in CLs. */
(state.written * sizeof(u32) / 64));
- xe_lrc_write_ctx_reg(lrc,
- CTX_CS_INDIRECT_CTX_OFFSET,
- CTX_INDIRECT_CTX_OFFSET_DEFAULT);
return 0;
}
@@ -1340,9 +1415,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
if (vm && vm->xef) /* userspace */
bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
- lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
- ttm_bo_type_kernel,
- bo_flags);
+ lrc->bo = xe_bo_create_pin_map_novm(xe, tile,
+ bo_size,
+ ttm_bo_type_kernel,
+ bo_flags, false);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9643442ef101..1d667fa36cf3 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -35,6 +35,7 @@
#include "xe_sched_job.h"
#include "xe_sync.h"
#include "xe_trace_bo.h"
+#include "xe_validation.h"
#include "xe_vm.h"
#include "xe_vram.h"
@@ -173,7 +174,7 @@ static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm,
}
static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
- struct xe_vm *vm)
+ struct xe_vm *vm, struct drm_exec *exec)
{
struct xe_device *xe = tile_to_xe(tile);
u16 pat_index = xe->pat.idx[XE_CACHE_WB];
@@ -200,7 +201,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
num_entries * XE_PAGE_SIZE,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PAGETABLE);
+ XE_BO_FLAG_PAGETABLE, exec);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -393,6 +394,24 @@ struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile)
return m;
}
+static int xe_migrate_lock_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int err = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ err = xe_migrate_prepare_vm(tile, m, vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ }
+
+ return err;
+}
+
/**
* xe_migrate_init() - Initialize a migrate context
* @m: The migration context
@@ -413,11 +432,9 @@ int xe_migrate_init(struct xe_migrate *m)
if (IS_ERR(vm))
return PTR_ERR(vm);
- xe_vm_lock(vm, false);
- err = xe_migrate_prepare_vm(tile, m, vm);
- xe_vm_unlock(vm);
+ err = xe_migrate_lock_prepare_vm(tile, m, vm);
if (err)
- goto err_out;
+ return err;
if (xe->info.has_usm) {
struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
@@ -842,11 +859,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0,
&src_L0_ofs, &src_L0_pt, 0, 0,
avail_pts);
-
- pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
- batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0,
- &dst_L0_ofs, &dst_L0_pt, 0,
- avail_pts, avail_pts);
+ if (copy_only_ccs) {
+ dst_L0_ofs = src_L0_ofs;
+ } else {
+ pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
+ batch_size += pte_update_size(m, pte_flags, dst,
+ &dst_it, &src_L0,
+ &dst_L0_ofs, &dst_L0_pt,
+ 0, avail_pts, avail_pts);
+ }
if (copy_system_ccs) {
xe_assert(xe, type_device);
@@ -876,7 +897,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
xe_res_next(&dst_it, src_L0);
- else
+ else if (!copy_only_ccs)
emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs,
&dst_it, src_L0, dst);
@@ -908,7 +929,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (!fence) {
err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv,
DMA_RESV_USAGE_BOOKKEEP);
- if (!err && src_bo != dst_bo)
+ if (!err && src_bo->ttm.base.resv != dst_bo->ttm.base.resv)
err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv,
DMA_RESV_USAGE_BOOKKEEP);
if (err)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index a188bad172ad..a4894eb0d7f3 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -883,9 +883,9 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
{
struct xe_bo *bo;
- bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
- size, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile,
+ size, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 046d330bad34..be91343829dd 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -334,6 +334,7 @@ static const struct xe_device_desc bmg_desc = {
.has_mbx_power_limits = true,
.has_gsc_nvm = 1,
.has_heci_cscfi = 1,
+ .has_late_bind = true,
.has_sriov = true,
.max_gt_per_tile = 2,
.needs_scratch = true,
@@ -510,6 +511,26 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
*revid = REG_FIELD_GET(GMD_ID_REVID, val);
}
+static const struct xe_ip *find_graphics_ip(unsigned int verx100)
+{
+ KUNIT_STATIC_STUB_REDIRECT(find_graphics_ip, verx100);
+
+ for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++)
+ if (graphics_ips[i].verx100 == verx100)
+ return &graphics_ips[i];
+ return NULL;
+}
+
+static const struct xe_ip *find_media_ip(unsigned int verx100)
+{
+ KUNIT_STATIC_STUB_REDIRECT(find_media_ip, verx100);
+
+ for (int i = 0; i < ARRAY_SIZE(media_ips); i++)
+ if (media_ips[i].verx100 == verx100)
+ return &media_ips[i];
+ return NULL;
+}
+
/*
* Read IP version from hardware and select graphics/media IP descriptors
* based on the result.
@@ -527,14 +548,7 @@ static void handle_gmdid(struct xe_device *xe,
read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid);
- for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) {
- if (ver == graphics_ips[i].verx100) {
- *graphics_ip = &graphics_ips[i];
-
- break;
- }
- }
-
+ *graphics_ip = find_graphics_ip(ver);
if (!*graphics_ip) {
drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n",
ver / 100, ver % 100);
@@ -545,14 +559,7 @@ static void handle_gmdid(struct xe_device *xe,
if (ver == 0)
return;
- for (int i = 0; i < ARRAY_SIZE(media_ips); i++) {
- if (ver == media_ips[i].verx100) {
- *media_ip = &media_ips[i];
-
- break;
- }
- }
-
+ *media_ip = find_media_ip(ver);
if (!*media_ip) {
drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n",
ver / 100, ver % 100);
@@ -581,6 +588,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.has_gsc_nvm = desc->has_gsc_nvm;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
+ xe->info.has_late_bind = desc->has_late_bind;
xe->info.has_llc = desc->has_llc;
xe->info.has_pxp = desc->has_pxp;
xe->info.has_sriov = desc->has_sriov;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index b63002fc0f67..9b9766a3baa3 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -39,6 +39,7 @@ struct xe_device_desc {
u8 has_gsc_nvm:1;
u8 has_heci_gscfi:1;
u8 has_heci_cscfi:1;
+ u8 has_late_bind:1;
u8 has_llc:1;
u8 has_mbx_power_limits:1;
u8 has_pxp:1;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 6eea4190bbd2..d6625c71115b 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -21,6 +21,7 @@
#include "xe_gt_idle.h"
#include "xe_i2c.h"
#include "xe_irq.h"
+#include "xe_late_bind_fw.h"
#include "xe_pcode.h"
#include "xe_pxp.h"
#include "xe_sriov_vf_ccs.h"
@@ -129,6 +130,8 @@ int xe_pm_suspend(struct xe_device *xe)
if (err)
goto err;
+ xe_late_bind_wait_for_worker_completion(&xe->late_bind);
+
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
@@ -213,9 +216,11 @@ int xe_pm_resume(struct xe_device *xe)
xe_pxp_pm_resume(xe->pxp);
- if (IS_SRIOV_VF(xe))
+ if (IS_VF_CCS_READY(xe))
xe_sriov_vf_ccs_register_context(xe);
+ xe_late_bind_fw_load(&xe->late_bind);
+
drm_dbg(&xe->drm, "Device resumed\n");
return 0;
err:
@@ -598,9 +603,12 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe_pxp_pm_resume(xe->pxp);
- if (IS_SRIOV_VF(xe))
+ if (IS_VF_CCS_READY(xe))
xe_sriov_vf_ccs_register_context(xe);
+ if (xe->d3cold.allowed)
+ xe_late_bind_fw_load(&xe->late_bind);
+
out:
xe_rpm_lockmap_release(xe);
xe_pm_write_callback_task(xe, NULL);
diff --git a/drivers/gpu/drm/xe/xe_printk.h b/drivers/gpu/drm/xe/xe_printk.h
new file mode 100644
index 000000000000..c5be2385aa95
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_printk.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PRINTK_H_
+#define _XE_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+
+#define __XE_PRINTK_FMT(_xe, _fmt, _args...) _fmt, ##_args
+
+#define xe_printk(_xe, _level, _fmt, ...) \
+ drm_##_level(&(_xe)->drm, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_err(_xe, _fmt, ...) \
+ xe_printk((_xe), err, _fmt, ##__VA_ARGS__)
+
+#define xe_err_once(_xe, _fmt, ...) \
+ xe_printk((_xe), err_once, _fmt, ##__VA_ARGS__)
+
+#define xe_err_ratelimited(_xe, _fmt, ...) \
+ xe_printk((_xe), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_warn(_xe, _fmt, ...) \
+ xe_printk((_xe), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_notice(_xe, _fmt, ...) \
+ xe_printk((_xe), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_info(_xe, _fmt, ...) \
+ xe_printk((_xe), info, _fmt, ##__VA_ARGS__)
+
+#define xe_dbg(_xe, _fmt, ...) \
+ xe_printk((_xe), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_WARN_type(_xe, _type, _condition, _fmt, ...) \
+ drm_WARN##_type(&(_xe)->drm, _condition, _fmt, ## __VA_ARGS__)
+
+#define xe_WARN(_xe, _condition, _fmt, ...) \
+ xe_WARN_type((_xe),, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_WARN_ONCE(_xe, _condition, _fmt, ...) \
+ xe_WARN_type((_xe), _ONCE, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_WARN_ON(_xe, _condition) \
+ xe_WARN((_xe), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
+
+#define xe_WARN_ON_ONCE(_xe, _condition) \
+ xe_WARN_ONCE((_xe), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
+
+static inline void __xe_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+
+ xe_err(xe, "%pV", vaf);
+}
+
+static inline void __xe_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+
+ xe_info(xe, "%pV", vaf);
+}
+
+static inline void __xe_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+ struct drm_printer ddp;
+
+ /*
+ * The original xe_dbg() callsite annotations are useless here,
+ * redirect to the tweaked drm_dbg_printer() instead.
+ */
+ ddp = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
+ ddp.origin = p->origin;
+
+ drm_printf(&ddp, __XE_PRINTK_FMT(xe, "%pV", vaf));
+}
+
+/**
+ * xe_err_printer - Construct a &drm_printer that outputs to xe_err()
+ * @xe: the &xe_device pointer to use in xe_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_err_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_err,
+ .arg = xe,
+ };
+ return p;
+}
+
+/**
+ * xe_info_printer - Construct a &drm_printer that outputs to xe_info()
+ * @xe: the &xe_device pointer to use in xe_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_info_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_info,
+ .arg = xe,
+ };
+ return p;
+}
+
+/**
+ * xe_dbg_printer - Construct a &drm_printer that outputs like xe_dbg()
+ * @xe: the &xe_device pointer to use in xe_dbg()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_dbg_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_dbg,
+ .arg = xe,
+ .origin = (const void *)_THIS_IP_,
+ };
+ return p;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c
index a2c9ff5bfd59..45d142191d60 100644
--- a/drivers/gpu/drm/xe/xe_psmi.c
+++ b/drivers/gpu/drm/xe/xe_psmi.c
@@ -68,9 +68,7 @@ static void psmi_cleanup(struct xe_device *xe)
static struct xe_bo *psmi_alloc_object(struct xe_device *xe,
unsigned int id, size_t bo_size)
{
- struct xe_bo *bo = NULL;
struct xe_tile *tile;
- int err;
if (!id || !bo_size)
return NULL;
@@ -78,22 +76,12 @@ static struct xe_bo *psmi_alloc_object(struct xe_device *xe,
tile = &xe->tiles[id - 1];
/* VRAM: Allocate GEM object for the capture buffer */
- bo = xe_bo_create_locked(xe, tile, NULL, bo_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PINNED |
- XE_BO_FLAG_PINNED_LATE_RESTORE |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
-
- if (!IS_ERR(bo)) {
- /* Buffer written by HW, ensure stays resident */
- err = xe_bo_pin(bo);
- if (err)
- bo = ERR_PTR(err);
- xe_bo_unlock(bo);
- }
-
- return bo;
+ return xe_bo_create_pin_range_novm(xe, tile, bo_size, 0, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_PINNED_LATE_RESTORE |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
}
/*
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index c129048a9a09..a1c88f9a6c76 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -13,17 +13,17 @@
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
-#include "xe_tlb_inval_job.h"
#include "xe_migrate.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
#include "xe_res_cursor.h"
#include "xe_sched_job.h"
-#include "xe_sync.h"
#include "xe_svm.h"
+#include "xe_sync.h"
#include "xe_tlb_inval_job.h"
#include "xe_trace.h"
#include "xe_ttm_stolen_mgr.h"
+#include "xe_userptr.h"
#include "xe_vm.h"
struct xe_pt_dir {
@@ -89,6 +89,7 @@ static void xe_pt_free(struct xe_pt *pt)
* @vm: The vm to create for.
* @tile: The tile to create for.
* @level: The page-table level.
+ * @exec: The drm_exec object used to lock the vm.
*
* Allocate and initialize a single struct xe_pt metadata structure. Also
* create the corresponding page-table bo, but don't initialize it. If the
@@ -100,7 +101,7 @@ static void xe_pt_free(struct xe_pt *pt)
* error.
*/
struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
- unsigned int level)
+ unsigned int level, struct drm_exec *exec)
{
struct xe_pt *pt;
struct xe_bo *bo;
@@ -124,9 +125,11 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
pt->level = level;
+
+ drm_WARN_ON(&vm->xe->drm, IS_ERR_OR_NULL(exec));
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
ttm_bo_type_kernel,
- bo_flags);
+ bo_flags, exec);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto err_kfree;
@@ -590,7 +593,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (covers || !*child) {
u64 flags = 0;
- xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1);
+ xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1,
+ xe_vm_validation_exec(vm));
if (IS_ERR(xe_child))
return PTR_ERR(xe_child);
@@ -729,7 +733,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
return -EAGAIN;
}
if (xe_svm_range_has_dma_mapping(range)) {
- xe_res_first_dma(range->base.dma_addr, 0,
+ xe_res_first_dma(range->base.pages.dma_addr, 0,
range->base.itree.last + 1 - range->base.itree.start,
&curs);
xe_svm_range_debug(range, "BIND PREPARE - MIXED");
@@ -760,8 +764,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
if (!xe_vma_is_null(vma) && !range) {
if (xe_vma_is_userptr(vma))
- xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
- xe_vma_size(vma), &curs);
+ xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0,
+ xe_vma_size(vma), &curs);
else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
xe_vma_size(vma), &curs);
@@ -914,7 +918,7 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
if (xe_vma_bo(vma))
xe_bo_assert_held(xe_vma_bo(vma));
else if (xe_vma_is_userptr(vma))
- lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock);
+ lockdep_assert_held(&xe_vma_vm(vma)->svm.gpusvm.notifier_lock);
if (!(pt_mask & BIT(tile->id)))
return false;
@@ -1049,7 +1053,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
xe_pt_commit_prepare_locks_assert(vma);
if (xe_vma_is_userptr(vma))
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
}
static void xe_pt_commit(struct xe_vma *vma,
@@ -1376,6 +1380,7 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
pt_update_ops, rftree);
}
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
@@ -1406,7 +1411,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
struct xe_userptr_vma *uvma;
unsigned long notifier_seq;
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
if (!xe_vma_is_userptr(vma))
return 0;
@@ -1415,7 +1420,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
if (xe_pt_userptr_inject_eagain(uvma))
xe_vma_userptr_force_invalidate(uvma);
- notifier_seq = uvma->userptr.notifier_seq;
+ notifier_seq = uvma->userptr.pages.notifier_seq;
if (!mmu_interval_read_retry(&uvma->userptr.notifier,
notifier_seq))
@@ -1431,12 +1436,12 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
return 0;
}
-static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
- struct xe_vm_pgtable_update_ops *pt_update)
+static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
+ struct xe_vm_pgtable_update_ops *pt_update)
{
int err = 0;
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
@@ -1454,9 +1459,40 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
case DRM_GPUVA_OP_UNMAP:
break;
case DRM_GPUVA_OP_PREFETCH:
- err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va),
- pt_update);
+ if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))) {
+ struct xe_svm_range *range = op->map_range.range;
+ unsigned long i;
+
+ xe_assert(vm->xe,
+ xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
+ xa_for_each(&op->prefetch_range.range, i, range) {
+ xe_svm_range_debug(range, "PRE-COMMIT");
+
+ if (!xe_svm_range_pages_valid(range)) {
+ xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
+ return -ENODATA;
+ }
+ }
+ } else {
+ err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), pt_update);
+ }
+ break;
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
+ case DRM_GPUVA_OP_DRIVER:
+ if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
+ struct xe_svm_range *range = op->map_range.range;
+
+ xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
+
+ xe_svm_range_debug(range, "PRE-COMMIT");
+
+ if (!xe_svm_range_pages_valid(range)) {
+ xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
+ return -EAGAIN;
+ }
+ }
break;
+#endif
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
@@ -1464,7 +1500,7 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
return err;
}
-static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
{
struct xe_vm *vm = pt_update->vops->vm;
struct xe_vma_ops *vops = pt_update->vops;
@@ -1477,69 +1513,18 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
if (err)
return err;
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
list_for_each_entry(op, &vops->list, link) {
- err = op_check_userptr(vm, op, pt_update_ops);
+ err = op_check_svm_userptr(vm, op, pt_update_ops);
if (err) {
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
break;
}
}
return err;
}
-
-#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
-static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
-{
- struct xe_vm *vm = pt_update->vops->vm;
- struct xe_vma_ops *vops = pt_update->vops;
- struct xe_vma_op *op;
- unsigned long i;
- int err;
-
- err = xe_pt_pre_commit(pt_update);
- if (err)
- return err;
-
- xe_svm_notifier_lock(vm);
-
- list_for_each_entry(op, &vops->list, link) {
- struct xe_svm_range *range = NULL;
-
- if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
- continue;
-
- if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
- xe_assert(vm->xe,
- xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
- xa_for_each(&op->prefetch_range.range, i, range) {
- xe_svm_range_debug(range, "PRE-COMMIT");
-
- if (!xe_svm_range_pages_valid(range)) {
- xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
- xe_svm_notifier_unlock(vm);
- return -ENODATA;
- }
- }
- } else {
- xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
- xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
- range = op->map_range.range;
-
- xe_svm_range_debug(range, "PRE-COMMIT");
-
- if (!xe_svm_range_pages_valid(range)) {
- xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
- xe_svm_notifier_unlock(vm);
- return -EAGAIN;
- }
- }
- }
-
- return 0;
-}
#endif
struct xe_pt_stage_unbind_walk {
@@ -1843,7 +1828,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
xe_vma_start(vma),
xe_vma_end(vma));
++pt_update_ops->current_op;
- pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+ pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma);
/*
* If rebind, we have to invalidate TLB on !LR vms to invalidate
@@ -1951,7 +1936,7 @@ static int unbind_op_prepare(struct xe_tile *tile,
xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma),
xe_vma_end(vma));
++pt_update_ops->current_op;
- pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+ pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma);
pt_update_ops->needs_invalidation = true;
xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries);
@@ -2199,7 +2184,7 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
vma->tile_invalidated & ~BIT(tile->id));
vma->tile_staged &= ~BIT(tile->id);
if (xe_vma_is_userptr(vma)) {
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
to_userptr_vma(vma)->userptr.initial_bind = true;
}
@@ -2235,7 +2220,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
if (!vma->tile_present) {
list_del_init(&vma->combined_links.rebind);
if (xe_vma_is_userptr(vma)) {
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
spin_lock(&vm->userptr.invalidated_lock);
list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
@@ -2338,20 +2323,14 @@ static const struct xe_migrate_pt_update_ops migrate_ops = {
.pre_commit = xe_pt_pre_commit,
};
-static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
- .populate = xe_vm_populate_pgtable,
- .clear = xe_migrate_clear_pgtable_callback,
- .pre_commit = xe_pt_userptr_pre_commit,
-};
-
-#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
-static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops = {
.populate = xe_vm_populate_pgtable,
.clear = xe_migrate_clear_pgtable_callback,
- .pre_commit = xe_pt_svm_pre_commit,
+ .pre_commit = xe_pt_svm_userptr_pre_commit,
};
#else
-static const struct xe_migrate_pt_update_ops svm_migrate_ops;
+static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops;
#endif
static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q,
@@ -2389,9 +2368,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
int err = 0, i;
struct xe_migrate_pt_update update = {
.ops = pt_update_ops->needs_svm_lock ?
- &svm_migrate_ops :
- pt_update_ops->needs_userptr_lock ?
- &userptr_migrate_ops :
+ &svm_userptr_migrate_ops :
&migrate_ops,
.vops = vops,
.tile_id = tile->id,
@@ -2533,8 +2510,6 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
if (pt_update_ops->needs_svm_lock)
xe_svm_notifier_unlock(vm);
- if (pt_update_ops->needs_userptr_lock)
- up_read(&vm->userptr.notifier_lock);
xe_tlb_inval_job_put(mjob);
xe_tlb_inval_job_put(ijob);
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 5ecf003d513c..4daeebaab5a1 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -10,6 +10,7 @@
#include "xe_pt_types.h"
struct dma_fence;
+struct drm_exec;
struct xe_bo;
struct xe_device;
struct xe_exec_queue;
@@ -29,7 +30,7 @@ struct xe_vma_ops;
unsigned int xe_pt_shift(unsigned int level);
struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
- unsigned int level);
+ unsigned int level, struct drm_exec *exec);
void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
struct xe_pt *pt);
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 17cdd7c7e9f5..881f01e14db8 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -105,8 +105,6 @@ struct xe_vm_pgtable_update_ops {
u32 current_op;
/** @needs_svm_lock: Needs SVM lock */
bool needs_svm_lock;
- /** @needs_userptr_lock: Needs userptr lock */
- bool needs_userptr_lock;
/** @needs_invalidation: Needs invalidation */
bool needs_invalidation;
/**
diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
index 3d62008c99f1..bdbdbbf6a678 100644
--- a/drivers/gpu/drm/xe/xe_pxp.c
+++ b/drivers/gpu/drm/xe/xe_pxp.c
@@ -688,6 +688,7 @@ start:
return ret;
}
+ALLOW_ERROR_INJECTION(xe_pxp_exec_queue_add, ERRNO);
static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock)
{
diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c
index ca95f2a4d4ef..e60526e30030 100644
--- a/drivers/gpu/drm/xe/xe_pxp_submit.c
+++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
@@ -54,8 +54,9 @@ static int allocate_vcs_execution_resources(struct xe_pxp *pxp)
* Each termination is 16 DWORDS, so 4K is enough to contain a
* termination for each sessions.
*/
- bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT,
+ false);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_queue;
@@ -87,7 +88,9 @@ static int allocate_gsc_client_resources(struct xe_gt *gt,
{
struct xe_tile *tile = gt_to_tile(gt);
struct xe_device *xe = tile_to_xe(tile);
+ struct xe_validation_ctx ctx;
struct xe_hw_engine *hwe;
+ struct drm_exec exec;
struct xe_vm *vm;
struct xe_bo *bo;
struct xe_exec_queue *q;
@@ -106,15 +109,26 @@ static int allocate_gsc_client_resources(struct xe_gt *gt,
return PTR_ERR(vm);
/* We allocate a single object for the batch and the in/out memory */
- xe_vm_lock(vm, false);
- bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
- xe_vm_unlock(vm);
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- goto vm_out;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags){}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
+
+ bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_NEEDS_UC, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
}
+ if (err)
+ goto vm_out;
fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB);
if (IS_ERR(fence)) {
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 4dbe5732cb7f..e1b603aba61b 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -21,6 +21,7 @@
#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
+#include "xe_gt_topology.h"
#include "xe_guc_hwconfig.h"
#include "xe_macros.h"
#include "xe_mmio.h"
@@ -477,7 +478,7 @@ static size_t calc_topo_query_size(struct xe_device *xe)
sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss);
/* L3bank mask may not be available for some GTs */
- if (!XE_GT_WA(gt, no_media_l3))
+ if (xe_gt_topology_report_l3(gt))
query_size += sizeof(struct drm_xe_query_topology_mask) +
sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask);
}
@@ -540,7 +541,7 @@ static int query_gt_topology(struct xe_device *xe,
* mask, then it's better to omit L3 from the query rather than
* reporting bogus or zeroed information to userspace.
*/
- if (!XE_GT_WA(gt, no_media_l3)) {
+ if (xe_gt_topology_report_l3(gt)) {
topo.type = DRM_XE_TOPO_L3_BANK;
err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
sizeof(gt->fuse_topo.l3_bank_mask));
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 47ea1521dc80..b5f430d59f80 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -370,3 +370,9 @@ bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt,
{
return xe_configfs_get_psmi_enabled(to_pci_dev(gt_to_xe(gt)->drm.dev));
}
+
+bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return xe_gt_has_discontiguous_dss_groups(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 7951fefdbe04..ac12ddf6cde6 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -480,4 +480,7 @@ bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt,
bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
+bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 87911fb4eea7..7d2d6de2aabf 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -160,19 +160,15 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size)
}
/**
- * xe_sriov_late_init() - SR-IOV late initialization functions.
+ * xe_sriov_init_late() - SR-IOV late initialization functions.
* @xe: the &xe_device to initialize
*
- * On VF this function will initialize code for CCS migration.
- *
* Return: 0 on success or a negative error code on failure.
*/
-int xe_sriov_late_init(struct xe_device *xe)
+int xe_sriov_init_late(struct xe_device *xe)
{
- int err = 0;
-
- if (IS_VF_CCS_INIT_NEEDED(xe))
- err = xe_sriov_vf_ccs_init(xe);
+ if (IS_SRIOV_VF(xe))
+ return xe_sriov_vf_init_late(xe);
- return err;
+ return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
index 0e0c1abf2d14..6db45df55615 100644
--- a/drivers/gpu/drm/xe/xe_sriov.h
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -18,7 +18,7 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len);
void xe_sriov_probe_early(struct xe_device *xe);
void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p);
int xe_sriov_init(struct xe_device *xe);
-int xe_sriov_late_init(struct xe_device *xe);
+int xe_sriov_init_late(struct xe_device *xe);
static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe)
{
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 5de81f213d83..cdd9f8e78b2a 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -3,6 +3,7 @@
* Copyright © 2023-2024 Intel Corporation
*/
+#include <drm/drm_debugfs.h>
#include <drm/drm_managed.h>
#include "xe_assert.h"
@@ -10,6 +11,7 @@
#include "xe_gt.h"
#include "xe_gt_sriov_printk.h"
#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_submit.h"
#include "xe_irq.h"
@@ -18,6 +20,7 @@
#include "xe_sriov.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_tile_sriov_vf.h"
/**
@@ -127,16 +130,66 @@
* | | |
*/
-static bool vf_migration_supported(struct xe_device *xe)
+/**
+ * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is
+ * supported or not.
+ * @xe: the &xe_device to check
+ *
+ * Returns: true if VF migration is supported, false otherwise.
+ */
+bool xe_sriov_vf_migration_supported(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_VF(xe));
+ return xe->sriov.vf.migration.enabled;
+}
+
+static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list va_args;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ va_start(va_args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &va_args;
+ xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf);
+ va_end(va_args);
+
+ xe->sriov.vf.migration.enabled = false;
+}
+
+static void migration_worker_func(struct work_struct *w);
+
+static void vf_migration_init_early(struct xe_device *xe)
{
/*
* TODO: Add conditions to allow specific platforms, when they're
* supported at production quality.
*/
- return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
-}
+ if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+ return vf_disable_migration(xe,
+ "experimental feature not available on production builds");
+
+ if (GRAPHICS_VER(xe) < 20)
+ return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found",
+ GRAPHICS_VER(xe));
+
+ if (!IS_DGFX(xe)) {
+ struct xe_uc_fw_version guc_version;
+
+ xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version);
+ if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0))
+ return vf_disable_migration(xe,
+ "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
+ guc_version.major, guc_version.minor);
+ }
-static void migration_worker_func(struct work_struct *w);
+ INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+
+ xe->sriov.vf.migration.enabled = true;
+ xe_sriov_dbg(xe, "migration support enabled\n");
+}
/**
* xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
@@ -144,10 +197,7 @@ static void migration_worker_func(struct work_struct *w);
*/
void xe_sriov_vf_init_early(struct xe_device *xe)
{
- INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
-
- if (!vf_migration_supported(xe))
- xe_sriov_info(xe, "migration not supported by this module version\n");
+ vf_migration_init_early(xe);
}
/**
@@ -302,8 +352,8 @@ static void vf_post_migration_recovery(struct xe_device *xe)
xe_pm_runtime_get(xe);
vf_post_migration_shutdown(xe);
- if (!vf_migration_supported(xe)) {
- xe_sriov_err(xe, "migration not supported by this module version\n");
+ if (!xe_sriov_vf_migration_supported(xe)) {
+ xe_sriov_err(xe, "migration is not supported\n");
err = -ENOTRECOVERABLE;
goto fail;
}
@@ -378,3 +428,48 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
drm_info(&xe->drm, "VF migration recovery %s\n", started ?
"scheduled" : "already in progress");
}
+
+/**
+ * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions.
+ * @xe: the &xe_device to initialize
+ *
+ * This function initializes code for CCS migration.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vf_init_late(struct xe_device *xe)
+{
+ int err = 0;
+
+ if (xe_sriov_vf_migration_supported(xe))
+ err = xe_sriov_vf_ccs_init(xe);
+
+ return err;
+}
+
+static int sa_info_vf_ccs(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct xe_device *xe = to_xe_device(node->minor->dev);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_sriov_vf_ccs_print(xe, &p);
+ return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+ { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs },
+};
+
+/**
+ * xe_sriov_vf_debugfs_register - Register VF debugfs attributes.
+ * @xe: the &xe_device
+ * @root: the root &dentry
+ *
+ * Prepare debugfs attributes exposed by the VF.
+ */
+void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root)
+{
+ drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list),
+ root, xe->drm.primary);
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
index 7b8622cff2b7..9e752105ec2a 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -6,9 +6,15 @@
#ifndef _XE_SRIOV_VF_H_
#define _XE_SRIOV_VF_H_
+#include <linux/types.h>
+
+struct dentry;
struct xe_device;
void xe_sriov_vf_init_early(struct xe_device *xe);
+int xe_sriov_vf_init_late(struct xe_device *xe);
void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
+bool xe_sriov_vf_migration_supported(struct xe_device *xe);
+void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root);
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
index 4872e43eb440..8dec616c37c9 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -13,8 +13,10 @@
#include "xe_guc_submit.h"
#include "xe_lrc.h"
#include "xe_migrate.h"
+#include "xe_pm.h"
#include "xe_sa.h"
#include "xe_sriov_printk.h"
+#include "xe_sriov_vf.h"
#include "xe_sriov_vf_ccs.h"
#include "xe_sriov_vf_ccs_types.h"
@@ -135,7 +137,7 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)
return round_up(bb_pool_size * 2, SZ_1M);
}
-static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx)
+static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_sa_manager *sa_manager;
@@ -167,7 +169,7 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx)
return 0;
}
-static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx)
+static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
{
u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
@@ -184,9 +186,8 @@ static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx)
xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
}
-static int register_save_restore_context(struct xe_tile_vf_ccs *ctx)
+static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx)
{
- int err = -EINVAL;
int ctx_type;
switch (ctx->ctx_id) {
@@ -197,10 +198,10 @@ static int register_save_restore_context(struct xe_tile_vf_ccs *ctx)
ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE;
break;
default:
- return err;
+ return -EINVAL;
}
- xe_guc_register_exec_queue(ctx->mig_q, ctx_type);
+ xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type);
return 0;
}
@@ -215,16 +216,14 @@ static int register_save_restore_context(struct xe_tile_vf_ccs *ctx)
*/
int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
{
- struct xe_tile *tile = xe_device_get_root_tile(xe);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
- struct xe_tile_vf_ccs *ctx;
+ struct xe_sriov_vf_ccs_ctx *ctx;
int err;
- if (!IS_VF_CCS_READY(xe))
- return 0;
+ xe_assert(xe, IS_VF_CCS_READY(xe));
for_each_ccs_rw_ctx(ctx_id) {
- ctx = &tile->sriov.vf.ccs[ctx_id];
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
err = register_save_restore_context(ctx);
if (err)
return err;
@@ -235,7 +234,7 @@ int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
static void xe_sriov_vf_ccs_fini(void *arg)
{
- struct xe_tile_vf_ccs *ctx = arg;
+ struct xe_sriov_vf_ccs_ctx *ctx = arg;
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
/*
@@ -259,17 +258,19 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
- struct xe_tile_vf_ccs *ctx;
+ struct xe_sriov_vf_ccs_ctx *ctx;
struct xe_exec_queue *q;
u32 flags;
int err;
xe_assert(xe, IS_SRIOV_VF(xe));
- xe_assert(xe, !IS_DGFX(xe));
- xe_assert(xe, xe_device_has_flat_ccs(xe));
+ xe_assert(xe, xe_sriov_vf_migration_supported(xe));
+
+ if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe))
+ return 0;
for_each_ccs_rw_ctx(ctx_id) {
- ctx = &tile->sriov.vf.ccs[ctx_id];
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
ctx->ctx_id = ctx_id;
flags = EXEC_QUEUE_FLAG_KERNEL |
@@ -324,13 +325,12 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
- struct xe_tile_vf_ccs *ctx;
+ struct xe_sriov_vf_ccs_ctx *ctx;
struct xe_tile *tile;
struct xe_bb *bb;
int err = 0;
- if (!IS_VF_CCS_READY(xe))
- return 0;
+ xe_assert(xe, IS_VF_CCS_READY(xe));
tile = xe_device_get_root_tile(xe);
@@ -339,7 +339,7 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
/* bb should be NULL here. Assert if not NULL */
xe_assert(xe, !bb);
- ctx = &tile->sriov.vf.ccs[ctx_id];
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id);
}
return err;
@@ -361,7 +361,9 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
struct xe_bb *bb;
- if (!IS_VF_CCS_READY(xe))
+ xe_assert(xe, IS_VF_CCS_READY(xe));
+
+ if (!xe_bo_has_valid_ccs_bb(bo))
return 0;
for_each_ccs_rw_ctx(ctx_id) {
@@ -375,3 +377,34 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
}
return 0;
}
+
+/**
+ * xe_sriov_vf_ccs_print - Print VF CCS details.
+ * @xe: the &xe_device
+ * @p: the &drm_printer
+ *
+ * This function is for VF use only.
+ */
+void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_sa_manager *bb_pool;
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+
+ if (!IS_VF_CCS_READY(xe))
+ return;
+
+ xe_pm_runtime_get(xe);
+
+ for_each_ccs_rw_ctx(ctx_id) {
+ bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
+ if (!bb_pool)
+ break;
+
+ drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
+ drm_printf(p, "-------------------------\n");
+ drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
+ drm_puts(p, "\n");
+ }
+
+ xe_pm_runtime_put(xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
index 1f1baf685fec..0745c0ff0228 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
@@ -6,6 +6,11 @@
#ifndef _XE_SRIOV_VF_CCS_H_
#define _XE_SRIOV_VF_CCS_H_
+#include "xe_device_types.h"
+#include "xe_sriov.h"
+#include "xe_sriov_vf_ccs_types.h"
+
+struct drm_printer;
struct xe_device;
struct xe_bo;
@@ -13,5 +18,17 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe);
int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo);
int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
+void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p);
+
+static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_VF(xe));
+ return xe->sriov.vf.ccs.initialized;
+}
+
+#define IS_VF_CCS_READY(xe) ({ \
+ struct xe_device *xe__ = (xe); \
+ IS_SRIOV_VF(xe__) && xe_sriov_vf_ccs_ready(xe__); \
+ })
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
index 93435a6f4cb6..22c499943d2a 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
@@ -6,48 +6,46 @@
#ifndef _XE_SRIOV_VF_CCS_TYPES_H_
#define _XE_SRIOV_VF_CCS_TYPES_H_
+#include <linux/types.h>
+
#define for_each_ccs_rw_ctx(id__) \
for ((id__) = 0; (id__) < XE_SRIOV_VF_CCS_CTX_COUNT; (id__)++)
-#define IS_VF_CCS_READY(xe) ({ \
- struct xe_device *___xe = (xe); \
- xe_assert(___xe, IS_SRIOV_VF(___xe)); \
- ___xe->sriov.vf.ccs.initialized; \
- })
-
-#define IS_VF_CCS_INIT_NEEDED(xe) ({\
- struct xe_device *___xe = (xe); \
- IS_SRIOV_VF(___xe) && !IS_DGFX(___xe) && \
- xe_device_has_flat_ccs(___xe) && GRAPHICS_VER(___xe) >= 20; \
- })
-
enum xe_sriov_vf_ccs_rw_ctxs {
XE_SRIOV_VF_CCS_READ_CTX,
XE_SRIOV_VF_CCS_WRITE_CTX,
XE_SRIOV_VF_CCS_CTX_COUNT
};
-#define IS_VF_CCS_BB_VALID(xe, bo) ({ \
- struct xe_device *___xe = (xe); \
- struct xe_bo *___bo = (bo); \
- IS_SRIOV_VF(___xe) && \
- ___bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && \
- ___bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; \
- })
-
struct xe_migrate;
struct xe_sa_manager;
-struct xe_tile_vf_ccs {
- /** @id: Id to which context it belongs to */
+/**
+ * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
+ */
+struct xe_sriov_vf_ccs_ctx {
+ /** @ctx_id: Id to which context it belongs to */
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+
/** @mig_q: exec queues used for migration */
struct xe_exec_queue *mig_q;
+ /** @mem: memory data */
struct {
- /** @ccs_bb_pool: Pool from which batch buffers are allocated. */
+ /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
struct xe_sa_manager *ccs_bb_pool;
} mem;
};
+/**
+ * struct xe_sriov_vf_ccs - The VF CCS migration support data.
+ */
+struct xe_sriov_vf_ccs {
+ /** @contexts: CCS read and write contexts for VF. */
+ struct xe_sriov_vf_ccs_ctx contexts[XE_SRIOV_VF_CCS_CTX_COUNT];
+
+ /** @initialized: Initialization of VF CCS is completed or not. */
+ bool initialized;
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
index 24a873c50c49..426cc5841958 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
@@ -9,6 +9,8 @@
#include <linux/types.h>
#include <linux/workqueue_types.h>
+#include "xe_sriov_vf_ccs_types.h"
+
/**
* struct xe_sriov_vf_relay_version - PF ABI version details.
*/
@@ -35,13 +37,15 @@ struct xe_device_vf {
struct work_struct worker;
/** @migration.gt_flags: Per-GT request flags for VF migration recovery */
unsigned long gt_flags;
+ /**
+ * @migration.enabled: flag indicating if migration support
+ * was enabled or not due to missing prerequisites
+ */
+ bool enabled;
} migration;
/** @ccs: VF CCS state data */
- struct {
- /** @ccs.initialized: Initilalization of VF CCS is completed or not */
- bool initialized;
- } ccs;
+ struct xe_sriov_vf_ccs ccs;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 7999cc5262a5..1662bfddd4bc 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -289,19 +289,10 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
u32 data;
bool survivability_mode;
- if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
+ if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE)
return false;
survivability_mode = xe_configfs_get_survivability_mode(pdev);
-
- if (xe->info.platform < XE_BATTLEMAGE) {
- if (survivability_mode) {
- dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n");
- xe_configfs_clear_survivability_mode(pdev);
- }
- return false;
- }
-
/* Enable survivability mode if set via configfs */
if (survivability_mode)
return true;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 76c6d74c1208..7f2f1f041f1d 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -6,6 +6,7 @@
#include <drm/drm_drv.h>
#include "xe_bo.h"
+#include "xe_exec_queue_types.h"
#include "xe_gt_stats.h"
#include "xe_migrate.h"
#include "xe_module.h"
@@ -25,9 +26,9 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range)
* memory.
*/
- struct drm_gpusvm_range_flags flags = {
+ struct drm_gpusvm_pages_flags flags = {
/* Pairs with WRITE_ONCE in drm_gpusvm.c */
- .__flags = READ_ONCE(range->base.flags.__flags),
+ .__flags = READ_ONCE(range->base.pages.flags.__flags),
};
return flags.has_devmem_pages;
@@ -49,15 +50,15 @@ static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
return gpusvm_to_vm(r->gpusvm);
}
-#define range_debug(r__, operaton__) \
+#define range_debug(r__, operation__) \
vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \
"%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
"start=0x%014lx, end=0x%014lx, size=%lu", \
- (operaton__), range_to_vm(&(r__)->base)->usm.asid, \
+ (operation__), range_to_vm(&(r__)->base)->usm.asid, \
(r__)->base.gpusvm, \
xe_svm_range_in_vram((r__)) ? 1 : 0, \
xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \
- (r__)->base.notifier_seq, \
+ (r__)->base.pages.notifier_seq, \
xe_svm_range_start((r__)), xe_svm_range_end((r__)), \
xe_svm_range_size((r__)))
@@ -112,6 +113,11 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
&vm->svm.garbage_collector.work);
}
+static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt)
+{
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1);
+}
+
static u8
xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
const struct mmu_notifier_range *mmu_range,
@@ -128,7 +134,7 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
range_debug(range, "NOTIFIER");
/* Skip if already unmapped or if no binding exist */
- if (range->base.flags.unmapped || !range->tile_present)
+ if (range->base.pages.flags.unmapped || !range->tile_present)
return 0;
range_debug(range, "NOTIFIER - EXECUTE");
@@ -144,13 +150,19 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
*/
for_each_tile(tile, xe, id)
if (xe_pt_zap_ptes_range(tile, vm, range)) {
- tile_mask |= BIT(id);
/*
* WRITE_ONCE pairs with READ_ONCE in
* xe_vm_has_valid_gpu_mapping()
*/
WRITE_ONCE(range->tile_invalidated,
range->tile_invalidated | BIT(id));
+
+ if (!(tile_mask & BIT(id))) {
+ xe_svm_tlb_inval_count_stats_incr(tile->primary_gt);
+ if (tile->media_gt)
+ xe_svm_tlb_inval_count_stats_incr(tile->media_gt);
+ tile_mask |= BIT(id);
+ }
}
return tile_mask;
@@ -170,6 +182,24 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
mmu_range);
}
+static s64 xe_svm_stats_ktime_us_delta(ktime_t start)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ?
+ ktime_us_delta(ktime_get(), start) : 0;
+}
+
+static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start)
+{
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta);
+}
+
+static ktime_t xe_svm_stats_ktime_get(void)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0;
+}
+
static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_notifier *notifier,
const struct mmu_notifier_range *mmu_range)
@@ -177,8 +207,10 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
struct xe_vm *vm = gpusvm_to_vm(gpusvm);
struct xe_device *xe = vm->xe;
struct drm_gpusvm_range *r, *first;
+ struct xe_tile *tile;
+ ktime_t start = xe_svm_stats_ktime_get();
u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
- u8 tile_mask = 0;
+ u8 tile_mask = 0, id;
long err;
xe_svm_assert_in_notifier(vm);
@@ -231,6 +263,13 @@ range_notifier_event_end:
r = first;
drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
xe_svm_range_notifier_event_end(vm, r, mmu_range);
+ for_each_tile(tile, xe, id) {
+ if (tile_mask & BIT(id)) {
+ xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start);
+ if (tile->media_gt)
+ xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start);
+ }
+ }
}
static int __xe_svm_garbage_collector(struct xe_vm *vm,
@@ -308,8 +347,8 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
if (xe_vm_is_closed_or_banned(vm))
return -ENOENT;
- spin_lock(&vm->svm.garbage_collector.lock);
for (;;) {
+ spin_lock(&vm->svm.garbage_collector.lock);
range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
typeof(*range),
garbage_collector_link);
@@ -338,8 +377,6 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
else
return err;
}
-
- spin_lock(&vm->svm.garbage_collector.lock);
}
spin_unlock(&vm->svm.garbage_collector.lock);
@@ -384,11 +421,66 @@ enum xe_svm_copy_dir {
XE_SVM_COPY_TO_SRAM,
};
+static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt,
+ const enum xe_svm_copy_dir dir,
+ int kb)
+{
+ if (dir == XE_SVM_COPY_TO_VRAM)
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb);
+ else
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb);
+}
+
+static void xe_svm_copy_us_stats_incr(struct xe_gt *gt,
+ const enum xe_svm_copy_dir dir,
+ unsigned long npages,
+ ktime_t start)
+{
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+
+ if (dir == XE_SVM_COPY_TO_VRAM) {
+ switch (npages) {
+ case 1:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
+ us_delta);
+ break;
+ case 16:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
+ us_delta);
+ break;
+ case 512:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
+ us_delta);
+ break;
+ }
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
+ us_delta);
+ } else {
+ switch (npages) {
+ case 1:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
+ us_delta);
+ break;
+ case 16:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
+ us_delta);
+ break;
+ case 512:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
+ us_delta);
+ break;
+ }
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US,
+ us_delta);
+ }
+}
+
static int xe_svm_copy(struct page **pages,
struct drm_pagemap_addr *pagemap_addr,
unsigned long npages, const enum xe_svm_copy_dir dir)
{
struct xe_vram_region *vr = NULL;
+ struct xe_gt *gt = NULL;
struct xe_device *xe;
struct dma_fence *fence = NULL;
unsigned long i;
@@ -396,6 +488,7 @@ static int xe_svm_copy(struct page **pages,
u64 vram_addr = XE_VRAM_ADDR_INVALID;
int err = 0, pos = 0;
bool sram = dir == XE_SVM_COPY_TO_SRAM;
+ ktime_t start = xe_svm_stats_ktime_get();
/*
* This flow is complex: it locates physically contiguous device pages,
@@ -422,6 +515,7 @@ static int xe_svm_copy(struct page **pages,
if (!vr && spage) {
vr = page_to_vr(spage);
+ gt = xe_migrate_exec_queue(vr->migrate)->gt;
xe = vr->xe;
}
XE_WARN_ON(spage && page_to_vr(spage) != vr);
@@ -461,6 +555,9 @@ static int xe_svm_copy(struct page **pages,
int incr = (match && last) ? 1 : 0;
if (vram_addr != XE_VRAM_ADDR_INVALID) {
+ xe_svm_copy_kb_stats_incr(gt, dir,
+ (i - pos + incr) *
+ (PAGE_SIZE / SZ_1K));
if (sram) {
vm_dbg(&xe->drm,
"COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
@@ -499,6 +596,8 @@ static int xe_svm_copy(struct page **pages,
/* Extra mismatched device page, copy it */
if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
+ xe_svm_copy_kb_stats_incr(gt, dir,
+ (PAGE_SIZE / SZ_1K));
if (sram) {
vm_dbg(&xe->drm,
"COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
@@ -532,6 +631,14 @@ err_out:
dma_fence_put(fence);
}
+ /*
+ * XXX: We can't derive the GT here (or anywhere in this functions, but
+ * compute always uses the primary GT so accumlate stats on the likely
+ * GT of the fault.
+ */
+ if (gt)
+ xe_svm_copy_us_stats_incr(gt, dir, npages, start);
+
return err;
#undef XE_MIGRATE_CHUNK_SIZE
#undef XE_VRAM_ADDR_INVALID
@@ -630,22 +737,26 @@ int xe_svm_init(struct xe_vm *vm)
{
int err;
- spin_lock_init(&vm->svm.garbage_collector.lock);
- INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
- INIT_WORK(&vm->svm.garbage_collector.work,
- xe_svm_garbage_collector_work_func);
-
- err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
- current->mm, xe_svm_devm_owner(vm->xe), 0,
- vm->size, xe_modparam.svm_notifier_size * SZ_1M,
- &gpusvm_ops, fault_chunk_sizes,
- ARRAY_SIZE(fault_chunk_sizes));
- if (err)
- return err;
-
- drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
+ if (vm->flags & XE_VM_FLAG_FAULT_MODE) {
+ spin_lock_init(&vm->svm.garbage_collector.lock);
+ INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
+ INIT_WORK(&vm->svm.garbage_collector.work,
+ xe_svm_garbage_collector_work_func);
+
+ err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
+ current->mm, xe_svm_devm_owner(vm->xe), 0,
+ vm->size,
+ xe_modparam.svm_notifier_size * SZ_1M,
+ &gpusvm_ops, fault_chunk_sizes,
+ ARRAY_SIZE(fault_chunk_sizes));
+ drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
+ } else {
+ err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
+ &vm->xe->drm, NULL, NULL, 0, 0, 0, NULL,
+ NULL, 0);
+ }
- return 0;
+ return err;
}
/**
@@ -716,7 +827,7 @@ bool xe_svm_range_validate(struct xe_vm *vm,
xe_svm_notifier_lock(vm);
ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask &&
- (devmem_preferred == range->base.flags.has_devmem_pages);
+ (devmem_preferred == range->base.pages.flags.has_devmem_pages);
xe_svm_notifier_unlock(vm);
@@ -755,49 +866,48 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
struct xe_device *xe = vr->xe;
struct device *dev = xe->drm.dev;
struct drm_buddy_block *block;
+ struct xe_validation_ctx vctx;
struct list_head *blocks;
+ struct drm_exec exec;
struct xe_bo *bo;
- ktime_t time_end = 0;
- int err, idx;
+ int err = 0, idx;
if (!drm_dev_enter(&xe->drm, &idx))
return -ENODEV;
xe_pm_runtime_get(xe);
- retry:
- bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start,
- ttm_bo_type_device,
- (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
- XE_BO_FLAG_CPU_ADDR_MIRROR);
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- if (xe_vm_validate_should_retry(NULL, err, &time_end))
- goto retry;
- goto out_pm_put;
- }
-
- drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
- &dpagemap_devmem_ops, dpagemap, end - start);
-
- blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
- list_for_each_entry(block, blocks, link)
- block->private = vr;
+ xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ bo = xe_bo_create_locked(xe, NULL, NULL, end - start,
+ ttm_bo_type_device,
+ (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
+ XE_BO_FLAG_CPU_ADDR_MIRROR, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&vctx, &err);
+ break;
+ }
- xe_bo_get(bo);
+ drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
+ &dpagemap_devmem_ops, dpagemap, end - start);
- /* Ensure the device has a pm ref while there are device pages active. */
- xe_pm_runtime_get_noresume(xe);
- err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
- start, end, timeslice_ms,
- xe_svm_devm_owner(xe));
- if (err)
- xe_svm_devmem_release(&bo->devmem_allocation);
+ blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
+ list_for_each_entry(block, blocks, link)
+ block->private = vr;
- xe_bo_unlock(bo);
- xe_bo_put(bo);
+ xe_bo_get(bo);
-out_pm_put:
+ /* Ensure the device has a pm ref while there are device pages active. */
+ xe_pm_runtime_get_noresume(xe);
+ err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
+ start, end, timeslice_ms,
+ xe_svm_devm_owner(xe));
+ if (err)
+ xe_svm_devmem_release(&bo->devmem_allocation);
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+ }
xe_pm_runtime_put(xe);
drm_dev_exit(idx);
@@ -827,17 +937,17 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm
struct xe_vm *vm = range_to_vm(&range->base);
u64 range_size = xe_svm_range_size(range);
- if (!range->base.flags.migrate_devmem || !preferred_region_is_vram)
+ if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram)
return false;
xe_assert(vm->xe, IS_DGFX(vm->xe));
- if (preferred_region_is_vram && xe_svm_range_in_vram(range)) {
+ if (xe_svm_range_in_vram(range)) {
drm_info(&vm->xe->drm, "Range is already in VRAM\n");
return false;
}
- if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
+ if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
return false;
}
@@ -845,27 +955,77 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm
return true;
}
+#define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \
+static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \
+ struct xe_svm_range *range) \
+{ \
+ switch (xe_svm_range_size(range)) { \
+ case SZ_4K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \
+ break; \
+ case SZ_64K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \
+ break; \
+ case SZ_2M: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \
+ break; \
+ } \
+} \
+
+DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT)
+DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT)
+DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE)
+
+#define DECL_SVM_RANGE_US_STATS(elem, stat) \
+static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \
+ struct xe_svm_range *range, \
+ ktime_t start) \
+{ \
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start); \
+\
+ switch (xe_svm_range_size(range)) { \
+ case SZ_4K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \
+ us_delta); \
+ break; \
+ case SZ_64K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \
+ us_delta); \
+ break; \
+ case SZ_2M: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \
+ us_delta); \
+ break; \
+ } \
+} \
+
+DECL_SVM_RANGE_US_STATS(migrate, MIGRATE)
+DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES)
+DECL_SVM_RANGE_US_STATS(bind, BIND)
+DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT)
+
static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct xe_gt *gt, u64 fault_addr,
bool need_vram)
{
+ int devmem_possible = IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
struct drm_gpusvm_ctx ctx = {
.read_only = xe_vma_read_only(vma),
- .devmem_possible = IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
- .check_pages_threshold = IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0,
- .devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
- .timeslice_ms = need_vram && IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
+ .devmem_possible = devmem_possible,
+ .check_pages_threshold = devmem_possible ? SZ_64K : 0,
+ .devmem_only = need_vram && devmem_possible,
+ .timeslice_ms = need_vram && devmem_possible ?
vm->xe->atomic_svm_timeslice_ms : 0,
};
+ struct xe_validation_ctx vctx;
+ struct drm_exec exec;
struct xe_svm_range *range;
struct dma_fence *fence;
struct drm_pagemap *dpagemap;
struct xe_tile *tile = gt_to_tile(gt);
int migrate_try_count = ctx.devmem_only ? 3 : 1;
- ktime_t end = 0;
+ ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start;
int err;
lockdep_assert_held_write(&vm->lock);
@@ -884,23 +1044,34 @@ retry:
if (IS_ERR(range))
return PTR_ERR(range);
- if (ctx.devmem_only && !range->base.flags.migrate_devmem)
- return -EACCES;
+ xe_svm_range_fault_count_stats_incr(gt, range);
- if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
- return 0;
+ if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) {
+ xe_svm_range_valid_fault_count_stats_incr(gt, range);
+ range_debug(range, "PAGE FAULT - VALID");
+ goto out;
+ }
range_debug(range, "PAGE FAULT");
dpagemap = xe_vma_resolve_pagemap(vma, tile);
if (--migrate_try_count >= 0 &&
xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
+ ktime_t migrate_start = xe_svm_stats_ktime_get();
+
/* TODO : For multi-device dpagemap will be used to find the
* remote tile and remote device. Will need to modify
* xe_svm_alloc_vram to use dpagemap for future multi-device
* support.
*/
+ xe_svm_range_migrate_count_stats_incr(gt, range);
err = xe_svm_alloc_vram(tile, range, &ctx);
+ xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
if (migrate_try_count || !ctx.devmem_only) {
@@ -917,6 +1088,8 @@ retry:
}
}
+ get_pages_start = xe_svm_stats_ktime_get();
+
range_debug(range, "GET PAGES");
err = xe_svm_range_get_pages(vm, range, &ctx);
/* Corner where CPU mappings have changed */
@@ -936,32 +1109,45 @@ retry:
}
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
- goto err_out;
+ goto out;
}
+ xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start);
range_debug(range, "PAGE FAULT - BIND");
-retry_bind:
- xe_vm_lock(vm, false);
- fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
- if (IS_ERR(fence)) {
- xe_vm_unlock(vm);
- err = PTR_ERR(fence);
- if (err == -EAGAIN) {
- ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
- range_debug(range, "PAGE FAULT - RETRY BIND");
- goto retry;
+ bind_start = xe_svm_stats_ktime_get();
+ xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+
+ xe_vm_set_validation_exec(vm, &exec);
+ fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
+ xe_vm_set_validation_exec(vm, NULL);
+ if (IS_ERR(fence)) {
+ drm_exec_retry_on_contention(&exec);
+ err = PTR_ERR(fence);
+ xe_validation_retry_on_oom(&vctx, &err);
+ xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
+ break;
}
- if (xe_vm_validate_should_retry(NULL, err, &end))
- goto retry_bind;
- goto err_out;
}
- xe_vm_unlock(vm);
+ if (err)
+ goto err_out;
dma_fence_wait(fence, false);
dma_fence_put(fence);
+ xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
+
+out:
+ xe_svm_range_fault_us_stats_incr(gt, range, start);
+ return 0;
err_out:
+ if (err == -EAGAIN) {
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
+ range_debug(range, "PAGE FAULT - RETRY BIND");
+ goto retry;
+ }
return err;
}
@@ -1089,7 +1275,7 @@ struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)),
xe_vma_start(vma), xe_vma_end(vma), ctx);
if (IS_ERR(r))
- return ERR_PTR(PTR_ERR(r));
+ return ERR_CAST(r);
return to_xe_range(r);
}
@@ -1221,7 +1407,7 @@ int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
{
struct drm_pagemap *dpagemap;
- xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem);
+ xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem);
range_debug(range, "ALLOCATE VRAM");
dpagemap = tile_local_pagemap(tile);
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 9d6a8840a8b7..cef6ee7d6fe3 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -105,7 +105,7 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
{
lockdep_assert_held(&range->base.gpusvm->notifier_lock);
- return range->base.flags.has_dma_mapping;
+ return range->base.pages.flags.has_dma_mapping;
}
/**
@@ -155,19 +155,11 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range)
return drm_gpusvm_range_size(&range->base);
}
-#define xe_svm_assert_in_notifier(vm__) \
- lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
-
-#define xe_svm_notifier_lock(vm__) \
- drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
-
-#define xe_svm_notifier_unlock(vm__) \
- drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
-
void xe_svm_flush(struct xe_vm *vm);
#else
#include <linux/interval_tree.h>
+#include "xe_vm.h"
struct drm_pagemap_addr;
struct drm_gpusvm_ctx;
@@ -184,7 +176,9 @@ struct xe_vram_region;
struct xe_svm_range {
struct {
struct interval_tree_node itree;
- const struct drm_pagemap_addr *dma_addr;
+ struct {
+ const struct drm_pagemap_addr *dma_addr;
+ } pages;
} base;
u32 tile_present;
u32 tile_invalidated;
@@ -204,12 +198,21 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
static inline
int xe_svm_init(struct xe_vm *vm)
{
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+ return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm,
+ NULL, NULL, 0, 0, 0, NULL, NULL, 0);
+#else
return 0;
+#endif
}
static inline
void xe_svm_fini(struct xe_vm *vm)
{
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+ xe_assert(vm->xe, xe_vm_is_closed(vm));
+ drm_gpusvm_fini(&vm->svm.gpusvm);
+#endif
}
static inline
@@ -326,19 +329,47 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
return NULL;
}
-#define xe_svm_assert_in_notifier(...) do {} while (0)
+static inline void xe_svm_flush(struct xe_vm *vm)
+{
+}
#define xe_svm_range_has_dma_mapping(...) false
+#endif /* CONFIG_DRM_XE_GPUSVM */
+
+#if IS_ENABLED(CONFIG_DRM_GPUSVM) /* Need to support userptr without XE_GPUSVM */
+#define xe_svm_assert_in_notifier(vm__) \
+ lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_assert_held_read(vm__) \
+ lockdep_assert_held_read(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_lock(vm__) \
+ drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
+
+#define xe_svm_notifier_lock_interruptible(vm__) \
+ down_read_interruptible(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_unlock(vm__) \
+ drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
+
+#else
+#define xe_svm_assert_in_notifier(...) do {} while (0)
+
+static inline void xe_svm_assert_held_read(struct xe_vm *vm)
+{
+}
static inline void xe_svm_notifier_lock(struct xe_vm *vm)
{
}
-static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
+static inline int xe_svm_notifier_lock_interruptible(struct xe_vm *vm)
{
+ return 0;
}
-static inline void xe_svm_flush(struct xe_vm *vm)
+static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
{
}
-#endif
+#endif /* CONFIG_DRM_GPUSVM */
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c
new file mode 100644
index 000000000000..5523874cba7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+#include <drm/drm_debugfs.h>
+
+#include "xe_pm.h"
+#include "xe_sa.h"
+#include "xe_tile_debugfs.h"
+
+static struct xe_tile *node_to_tile(struct drm_info_node *node)
+{
+ return node->dent->d_parent->d_inode->i_private;
+}
+
+/**
+ * tile_debugfs_simple_show - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * This callback can be used in struct drm_info_list to describe debugfs
+ * files that are &xe_tile specific.
+ *
+ * It is assumed that those debugfs files will be created on directory entry
+ * which struct dentry d_inode->i_private points to &xe_tile.
+ *
+ * /sys/kernel/debug/dri/0/
+ * ├── tile0/ # tile = dentry->d_inode->i_private
+ * │ │ ├── id # tile = dentry->d_parent->d_inode->i_private
+ *
+ * This function assumes that &m->private will be set to the &struct
+ * drm_info_node corresponding to the instance of the info on a given &struct
+ * drm_minor (see struct drm_info_list.show for details).
+ *
+ * This function also assumes that struct drm_info_list.data will point to the
+ * function code that will actually print a file content::
+ *
+ * int (*print)(struct xe_tile *, struct drm_printer *)
+ *
+ * Example::
+ *
+ * int tile_id(struct xe_tile *tile, struct drm_printer *p)
+ * {
+ * drm_printf(p, "%u\n", tile->id);
+ * return 0;
+ * }
+ *
+ * static const struct drm_info_list info[] = {
+ * { name = "id", .show = tile_debugfs_simple_show, .data = tile_id },
+ * };
+ *
+ * dir = debugfs_create_dir("tile0", parent);
+ * dir->d_inode->i_private = tile;
+ * drm_debugfs_create_files(info, ARRAY_SIZE(info), dir, minor);
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int tile_debugfs_simple_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct drm_info_node *node = m->private;
+ struct xe_tile *tile = node_to_tile(node);
+ int (*print)(struct xe_tile *, struct drm_printer *) = node->info_ent->data;
+
+ return print(tile, &p);
+}
+
+/**
+ * tile_debugfs_show_with_rpm - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * Similar to tile_debugfs_simple_show() but implicitly takes a RPM ref.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct xe_tile *tile = node_to_tile(node);
+ struct xe_device *xe = tile_to_xe(tile);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = tile_debugfs_simple_show(m, data);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static int sa_info(struct xe_tile *tile, struct drm_printer *p)
+{
+ drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
+ xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool));
+
+ return 0;
+}
+
+/* only for debugfs files which can be safely used on the VF */
+static const struct drm_info_list vf_safe_debugfs_list[] = {
+ { "sa_info", .show = tile_debugfs_show_with_rpm, .data = sa_info },
+};
+
+/**
+ * xe_tile_debugfs_register - Register tile's debugfs attributes
+ * @tile: the &xe_tile to register
+ *
+ * Create debugfs sub-directory with a name that includes a tile ID and
+ * then creates set of debugfs files (attributes) specific to this tile.
+ */
+void xe_tile_debugfs_register(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_minor *minor = xe->drm.primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[8];
+
+ snprintf(name, sizeof(name), "tile%u", tile->id);
+ tile->debugfs = debugfs_create_dir(name, root);
+ if (IS_ERR(tile->debugfs))
+ return;
+
+ /*
+ * Store the xe_tile pointer as private data of the tile/ directory
+ * node so other tile specific attributes under that directory may
+ * refer to it by looking at its parent node private data.
+ */
+ tile->debugfs->d_inode->i_private = tile;
+
+ drm_debugfs_create_files(vf_safe_debugfs_list,
+ ARRAY_SIZE(vf_safe_debugfs_list),
+ tile->debugfs, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.h b/drivers/gpu/drm/xe/xe_tile_debugfs.h
new file mode 100644
index 000000000000..0e5f724de37f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TILE_DEBUGFS_H_
+#define _XE_TILE_DEBUGFS_H_
+
+struct xe_tile;
+
+void xe_tile_debugfs_register(struct xe_tile *tile);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_printk.h b/drivers/gpu/drm/xe/xe_tile_printk.h
new file mode 100644
index 000000000000..63640a42685d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_printk.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _xe_tile_printk_H_
+#define _xe_tile_printk_H_
+
+#include "xe_printk.h"
+
+#define __XE_TILE_PRINTK_FMT(_tile, _fmt, _args...) "Tile%u: " _fmt, (_tile)->id, ##_args
+
+#define xe_tile_printk(_tile, _level, _fmt, ...) \
+ xe_printk((_tile)->xe, _level, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_err(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_err_once(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err_once, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_err_ratelimited(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_warn(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_notice(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_info(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), info, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_dbg(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_WARN_type(_tile, _type, _condition, _fmt, ...) \
+ xe_WARN##_type((_tile)->xe, _condition, _fmt, ## __VA_ARGS__)
+
+#define xe_tile_WARN(_tile, _condition, _fmt, ...) \
+ xe_tile_WARN_type((_tile),, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_WARN_ONCE(_tile, _condition, _fmt, ...) \
+ xe_tile_WARN_type((_tile), _ONCE, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_WARN_ON(_tile, _condition) \
+ xe_tile_WARN((_tile), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
+
+#define xe_tile_WARN_ON_ONCE(_tile, _condition) \
+ xe_tile_WARN_ONCE((_tile), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
+
+static inline void __xe_tile_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+
+ xe_tile_err(tile, "%pV", vaf);
+}
+
+static inline void __xe_tile_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+
+ xe_tile_info(tile, "%pV", vaf);
+}
+
+static inline void __xe_tile_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+ struct drm_printer dbg;
+
+ /*
+ * The original xe_tile_dbg() callsite annotations are useless here,
+ * redirect to the tweaked xe_dbg_printer() instead.
+ */
+ dbg = xe_dbg_printer(tile->xe);
+ dbg.origin = p->origin;
+
+ drm_printf(&dbg, __XE_TILE_PRINTK_FMT(tile, "%pV", vaf));
+}
+
+/**
+ * xe_tile_err_printer - Construct a &drm_printer that outputs to xe_tile_err()
+ * @tile: the &xe_tile pointer to use in xe_tile_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_err_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_err,
+ .arg = tile,
+ };
+ return p;
+}
+
+/**
+ * xe_tile_info_printer - Construct a &drm_printer that outputs to xe_tile_info()
+ * @tile: the &xe_tile pointer to use in xe_tile_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_info_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_info,
+ .arg = tile,
+ };
+ return p;
+}
+
+/**
+ * xe_tile_dbg_printer - Construct a &drm_printer that outputs like xe_tile_dbg()
+ * @tile: the &xe_tile pointer to use in xe_tile_dbg()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_dbg_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_dbg,
+ .arg = tile,
+ .origin = (const void *)_THIS_IP_,
+ };
+ return p;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
index e6e97b5a7b5c..918a59e686ea 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -10,11 +10,10 @@
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
+#include "xe_gt_stats.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_tlb_inval.h"
-#include "xe_gt_stats.h"
-#include "xe_tlb_inval.h"
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_tlb_inval.h"
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 9bbdde604923..622b76078567 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -115,8 +115,8 @@ struct fw_blobs_by_type {
#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \
- fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \
+ fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \
fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \
fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \
@@ -328,7 +328,7 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
}
-static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
+static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_guc_info *guc_info)
{
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
@@ -343,11 +343,12 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
return -EINVAL;
}
- compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version);
- compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version);
- compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version);
+ compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, guc_info->submission_version);
+ compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, guc_info->submission_version);
+ compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, guc_info->submission_version);
- uc_fw->private_data_size = css->private_data_size;
+ uc_fw->build_type = FIELD_GET(CSS_UKERNEL_INFO_BUILDTYPE, guc_info->ukernel_info);
+ uc_fw->private_data_size = guc_info->private_data_size;
return 0;
}
@@ -416,8 +417,8 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
css = (struct uc_css_header *)fw_data;
/* Check integrity of size values inside CSS header */
- size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
- css->exponent_size_dw) * sizeof(u32);
+ size = (css->header_size_dw - css->rsa_info.key_size_dw - css->rsa_info.modulus_size_dw -
+ css->rsa_info.exponent_size_dw) * sizeof(u32);
if (unlikely(size != sizeof(struct uc_css_header))) {
drm_warn(&xe->drm,
"%s firmware %s: unexpected header size: %zu != %zu\n",
@@ -430,7 +431,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
/* now RSA */
- uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+ uc_fw->rsa_size = css->rsa_info.key_size_dw * sizeof(u32);
/* At least, it should have header, uCode and RSA. Size of all three. */
size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
@@ -443,12 +444,12 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
}
/* Get version numbers from the CSS header */
- release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version);
- release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version);
- release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
+ release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->guc_info.sw_version);
+ release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->guc_info.sw_version);
+ release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->guc_info.sw_version);
if (uc_fw->type == XE_UC_FW_TYPE_GUC)
- return guc_read_css_info(uc_fw, css);
+ return guc_read_css_info(uc_fw, &css->guc_info);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
index 87ade41209d0..3c9a63d13032 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -44,6 +44,39 @@
* in fw. So driver will load a truncated firmware in this case.
*/
+struct uc_css_rsa_info {
+ u32 key_size_dw;
+ u32 modulus_size_dw;
+ u32 exponent_size_dw;
+} __packed;
+
+struct uc_css_guc_info {
+ u32 time;
+#define CSS_TIME_HOUR (0xFF << 0)
+#define CSS_TIME_MIN (0xFF << 8)
+#define CSS_TIME_SEC (0xFFFF << 16)
+ u32 reserved0[5];
+ u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
+ u32 submission_version;
+ u32 reserved1[11];
+ u32 header_info;
+#define CSS_HEADER_INFO_SVN (0xFF)
+#define CSS_HEADER_INFO_COPY_VALID (0x1 << 31)
+ u32 private_data_size;
+ u32 ukernel_info;
+#define CSS_UKERNEL_INFO_DEVICEID (0xFFFF << 16)
+#define CSS_UKERNEL_INFO_PRODKEY (0xFF << 8)
+#define CSS_UKERNEL_INFO_BUILDTYPE (0x3 << 2)
+#define CSS_UKERNEL_INFO_BUILDTYPE_PROD 0
+#define CSS_UKERNEL_INFO_BUILDTYPE_PREPROD 1
+#define CSS_UKERNEL_INFO_BUILDTYPE_DEBUG 2
+#define CSS_UKERNEL_INFO_ENCSTATUS (0x1 << 1)
+#define CSS_UKERNEL_INFO_COPY_VALID (0x1 << 0)
+} __packed;
+
struct uc_css_header {
u32 module_type;
/*
@@ -52,36 +85,21 @@ struct uc_css_header {
*/
u32 header_size_dw;
u32 header_version;
- u32 module_id;
+ u32 reserved0;
u32 module_vendor;
u32 date;
-#define CSS_DATE_DAY (0xFF << 0)
-#define CSS_DATE_MONTH (0xFF << 8)
-#define CSS_DATE_YEAR (0xFFFF << 16)
+#define CSS_DATE_DAY (0xFF << 0)
+#define CSS_DATE_MONTH (0xFF << 8)
+#define CSS_DATE_YEAR (0xFFFF << 16)
u32 size_dw; /* uCode plus header_size_dw */
- u32 key_size_dw;
- u32 modulus_size_dw;
- u32 exponent_size_dw;
- u32 time;
-#define CSS_TIME_HOUR (0xFF << 0)
-#define CSS_DATE_MIN (0xFF << 8)
-#define CSS_DATE_SEC (0xFFFF << 16)
- char username[8];
- char buildnumber[12];
- u32 sw_version;
-#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
-#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
-#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
union {
- u32 submission_version; /* only applies to GuC */
- u32 reserved2;
+ u32 reserved1[3];
+ struct uc_css_rsa_info rsa_info;
};
- u32 reserved0[12];
union {
- u32 private_data_size; /* only applies to GuC */
- u32 reserved1;
+ u32 reserved2[22];
+ struct uc_css_guc_info guc_info;
};
- u32 header_info;
} __packed;
static_assert(sizeof(struct uc_css_header) == 128);
@@ -318,4 +336,70 @@ struct gsc_manifest_header {
u32 exponent_size; /* in dwords */
} __packed;
+/**
+ * DOC: Late binding Firmware Layout
+ *
+ * The Late binding binary starts with FPT header, which contains locations
+ * of various partitions of the binary. Here we're interested in finding out
+ * manifest version. To the manifest version, we need to locate CPD header
+ * one of the entry in CPD header points to manifest header. Manifest header
+ * contains the version.
+ *
+ * +================================================+
+ * | FPT Header |
+ * +================================================+
+ * | FPT entries[] |
+ * | entry1 |
+ * | ... |
+ * | entryX |
+ * | "LTES" |
+ * | ... |
+ * | offset >-----------------------------|------o
+ * +================================================+ |
+ * |
+ * +================================================+ |
+ * | CPD Header |<-----o
+ * +================================================+
+ * | CPD entries[] |
+ * | entry1 |
+ * | ... |
+ * | entryX |
+ * | "LTES.man" |
+ * | ... |
+ * | offset >----------------------------|------o
+ * +================================================+ |
+ * |
+ * +================================================+ |
+ * | Manifest Header |<-----o
+ * | ... |
+ * | FW version |
+ * | ... |
+ * +================================================+
+ */
+
+/* FPT Headers */
+struct csc_fpt_header {
+ u32 header_marker;
+#define CSC_FPT_HEADER_MARKER 0x54504624
+ u32 num_of_entries;
+ u8 header_version;
+ u8 entry_version;
+ u8 header_length; /* in bytes */
+ u8 flags;
+ u16 ticks_to_add;
+ u16 tokens_to_add;
+ u32 uma_size;
+ u32 crc32;
+ struct gsc_version fitc_version;
+} __packed;
+
+struct csc_fpt_entry {
+ u8 name[4]; /* partition name */
+ u32 reserved1;
+ u32 offset; /* offset from beginning of CSE region */
+ u32 length; /* partition length in bytes */
+ u32 reserved2[3];
+ u32 partition_flags;
+} __packed;
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 914026015019..77a1dcf8b4ed 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -147,6 +147,9 @@ struct xe_uc_fw {
/** @private_data_size: size of private data found in uC css header */
u32 private_data_size;
+
+ /** @build_type: Firmware build type (see CSS_UKERNEL_INFO_BUILDTYPE for definitions) */
+ u32 build_type;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
new file mode 100644
index 000000000000..91d09af71ced
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_userptr.h"
+
+#include <linux/mm.h>
+
+#include "xe_trace_bo.h"
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @uvma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+{
+ return mmu_interval_check_retry(&uvma->userptr.notifier,
+ uvma->userptr.pages.notifier_seq) ?
+ -EAGAIN : 0;
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the svm.gpusvm.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+ lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock);
+
+ return (list_empty(&vm->userptr.repin_list) &&
+ list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
+{
+ struct xe_vma *vma = &uvma->vma;
+ struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_device *xe = vm->xe;
+ struct drm_gpusvm_ctx ctx = {
+ .read_only = xe_vma_read_only(vma),
+ };
+
+ lockdep_assert_held(&vm->lock);
+ xe_assert(xe, xe_vma_is_userptr(vma));
+
+ if (vma->gpuva.flags & XE_VMA_DESTROYED)
+ return 0;
+
+ return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ uvma->userptr.notifier.mm,
+ &uvma->userptr.notifier,
+ xe_vma_userptr(vma),
+ xe_vma_userptr(vma) + xe_vma_size(vma),
+ &ctx);
+}
+
+static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ struct xe_vma *vma = &uvma->vma;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ struct drm_gpusvm_ctx ctx = {
+ .in_notifier = true,
+ .read_only = xe_vma_read_only(vma),
+ };
+ long err;
+
+ /*
+ * Tell exec and rebind worker they need to repin and rebind this
+ * userptr.
+ */
+ if (!xe_vm_in_fault_mode(vm) &&
+ !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_move_tail(&userptr->invalidate_link,
+ &vm->userptr.invalidated);
+ spin_unlock(&vm->userptr.invalidated_lock);
+ }
+
+ /*
+ * Preempt fences turn into schedule disables, pipeline these.
+ * Note that even in fault mode, we need to wait for binds and
+ * unbinds to complete, and those are attached as BOOKMARK fences
+ * to the vm.
+ */
+ dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, fence)
+ dma_fence_enable_sw_signaling(fence);
+ dma_resv_iter_end(&cursor);
+
+ err = dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ XE_WARN_ON(err <= 0);
+
+ if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
+ err = xe_vm_invalidate_vma(vma);
+ XE_WARN_ON(err);
+ }
+
+ drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
+}
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
+ struct xe_vma *vma = &uvma->vma;
+ struct xe_vm *vm = xe_vma_vm(vma);
+
+ xe_assert(vm->xe, xe_vma_is_userptr(vma));
+ trace_xe_vma_userptr_invalidate(vma);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "NOTIFIER: addr=0x%016llx, range=0x%016llx",
+ xe_vma_start(vma), xe_vma_size(vma));
+
+ down_write(&vm->svm.gpusvm.notifier_lock);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ __vma_userptr_invalidate(vm, uvma);
+ up_write(&vm->svm.gpusvm.notifier_lock);
+ trace_xe_vma_userptr_invalidate_complete(vma);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+ .invalidate = vma_userptr_invalidate,
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+/**
+ * xe_vma_userptr_force_invalidate() - force invalidate a userptr
+ * @uvma: The userptr vma to invalidate
+ *
+ * Perform a forced userptr invalidation for testing purposes.
+ */
+void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+
+ /* Protect against concurrent userptr pinning */
+ lockdep_assert_held(&vm->lock);
+ /* Protect against concurrent notifiers */
+ lockdep_assert_held(&vm->svm.gpusvm.notifier_lock);
+ /*
+ * Protect against concurrent instances of this function and
+ * the critical exec sections
+ */
+ xe_vm_assert_held(vm);
+
+ if (!mmu_interval_read_retry(&uvma->userptr.notifier,
+ uvma->userptr.pages.notifier_seq))
+ uvma->userptr.pages.notifier_seq -= 2;
+ __vma_userptr_invalidate(vm, uvma);
+}
+#endif
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+ struct xe_userptr_vma *uvma, *next;
+ int err = 0;
+
+ xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
+ lockdep_assert_held_write(&vm->lock);
+
+ /* Collect invalidated userptrs */
+ spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
+ list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
+ userptr.invalidate_link) {
+ list_del_init(&uvma->userptr.invalidate_link);
+ list_add_tail(&uvma->userptr.repin_link,
+ &vm->userptr.repin_list);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+
+ /* Pin and move to bind list */
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ err = xe_vma_userptr_pin_pages(uvma);
+ if (err == -EFAULT) {
+ list_del_init(&uvma->userptr.repin_link);
+ /*
+ * We might have already done the pin once already, but
+ * then had to retry before the re-bind happened, due
+ * some other condition in the caller, but in the
+ * meantime the userptr got dinged by the notifier such
+ * that we need to revalidate here, but this time we hit
+ * the EFAULT. In such a case make sure we remove
+ * ourselves from the rebind list to avoid going down in
+ * flames.
+ */
+ if (!list_empty(&uvma->vma.combined_links.rebind))
+ list_del_init(&uvma->vma.combined_links.rebind);
+
+ /* Wait for pending binds */
+ xe_vm_lock(vm, false);
+ dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+
+ down_read(&vm->svm.gpusvm.notifier_lock);
+ err = xe_vm_invalidate_vma(&uvma->vma);
+ up_read(&vm->svm.gpusvm.notifier_lock);
+ xe_vm_unlock(vm);
+ if (err)
+ break;
+ } else {
+ if (err)
+ break;
+
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->vma.combined_links.rebind,
+ &vm->rebind_list);
+ }
+ }
+
+ if (err) {
+ down_write(&vm->svm.gpusvm.notifier_lock);
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+ up_write(&vm->svm.gpusvm.notifier_lock);
+ }
+ return err;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+ return (list_empty_careful(&vm->userptr.repin_list) &&
+ list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start,
+ unsigned long range)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ int err;
+
+ INIT_LIST_HEAD(&userptr->invalidate_link);
+ INIT_LIST_HEAD(&userptr->repin_link);
+
+ err = mmu_interval_notifier_insert(&userptr->notifier, current->mm,
+ start, range,
+ &vma_userptr_notifier_ops);
+ if (err)
+ return err;
+
+ userptr->pages.notifier_seq = LONG_MAX;
+
+ return 0;
+}
+
+void xe_userptr_remove(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+ struct xe_userptr *userptr = &uvma->userptr;
+
+ drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(&uvma->vma) >> PAGE_SHIFT);
+
+ /*
+ * Since userptr pages are not pinned, we can't remove
+ * the notifier until we're sure the GPU is not accessing
+ * them anymore
+ */
+ mmu_interval_notifier_remove(&userptr->notifier);
+}
+
+void xe_userptr_destroy(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+
+ spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link));
+ list_del(&uvma->userptr.invalidate_link);
+ spin_unlock(&vm->userptr.invalidated_lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h
new file mode 100644
index 000000000000..ef801234991e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_userptr.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_USERPTR_H_
+#define _XE_USERPTR_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock.h>
+
+#include <drm/drm_gpusvm.h>
+
+struct xe_vm;
+struct xe_vma;
+struct xe_userptr_vma;
+
+/** struct xe_userptr_vm - User pointer VM level state */
+struct xe_userptr_vm {
+ /**
+ * @userptr.repin_list: list of VMAs which are user pointers,
+ * and needs repinning. Protected by @lock.
+ */
+ struct list_head repin_list;
+ /**
+ * @userptr.invalidated_lock: Protects the
+ * @userptr.invalidated list.
+ */
+ spinlock_t invalidated_lock;
+ /**
+ * @userptr.invalidated: List of invalidated userptrs, not yet
+ * picked
+ * up for revalidation. Protected from access with the
+ * @invalidated_lock. Removing items from the list
+ * additionally requires @lock in write mode, and adding
+ * items to the list requires either the @svm.gpusvm.notifier_lock in
+ * write mode, OR @lock in write mode.
+ */
+ struct list_head invalidated;
+};
+
+/** struct xe_userptr - User pointer */
+struct xe_userptr {
+ /** @invalidate_link: Link for the vm::userptr.invalidated list */
+ struct list_head invalidate_link;
+ /** @userptr: link into VM repin list if userptr. */
+ struct list_head repin_link;
+ /**
+ * @pages: gpusvm pages for this user pointer.
+ */
+ struct drm_gpusvm_pages pages;
+ /**
+ * @notifier: MMU notifier for user pointer (invalidation call back)
+ */
+ struct mmu_interval_notifier notifier;
+
+ /**
+ * @initial_bind: user pointer has been bound at least once.
+ * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held.
+ * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held.
+ */
+ bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+ u32 divisor;
+#endif
+};
+
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+void xe_userptr_remove(struct xe_userptr_vma *uvma);
+int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start,
+ unsigned long range);
+void xe_userptr_destroy(struct xe_userptr_vma *uvma);
+
+int xe_vm_userptr_pin(struct xe_vm *vm);
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
+int xe_vm_userptr_check_repin(struct xe_vm *vm);
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
+#else
+static inline void xe_userptr_remove(struct xe_userptr_vma *uvma) {}
+
+static inline int xe_userptr_setup(struct xe_userptr_vma *uvma,
+ unsigned long start, unsigned long range)
+{
+ return -ENODEV;
+}
+
+static inline void xe_userptr_destroy(struct xe_userptr_vma *uvma) {}
+
+static inline int xe_vm_userptr_pin(struct xe_vm *vm) { return 0; }
+static inline int __xe_vm_userptr_needs_repin(struct xe_vm *vm) { return 0; }
+static inline int xe_vm_userptr_check_repin(struct xe_vm *vm) { return 0; }
+static inline int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { return -ENODEV; }
+static inline int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { return -ENODEV; };
+#endif
+
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
+#else
+static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
+{
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_validation.c b/drivers/gpu/drm/xe/xe_validation.c
new file mode 100644
index 000000000000..826cd09966ef
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_validation.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#include "xe_bo.h"
+#include <drm/drm_exec.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gpuvm.h>
+
+#include "xe_assert.h"
+#include "xe_validation.h"
+
+#ifdef CONFIG_DRM_XE_DEBUG
+/**
+ * xe_validation_assert_exec() - Assert that the drm_exec pointer is suitable
+ * for validation.
+ * @xe: Pointer to the xe device.
+ * @exec: The drm_exec pointer to check.
+ * @obj: Pointer to the object subject to validation.
+ *
+ * NULL exec pointers are not allowed.
+ * For XE_VALIDATION_UNIMPLEMENTED, no checking.
+ * For XE_VLIDATION_OPT_OUT, check that the caller is a kunit test
+ * For XE_VALIDATION_UNSUPPORTED, check that the object subject to
+ * validation is a dma-buf, for which support for ww locking is
+ * not in place in the dma-buf layer.
+ */
+void xe_validation_assert_exec(const struct xe_device *xe,
+ const struct drm_exec *exec,
+ const struct drm_gem_object *obj)
+{
+ xe_assert(xe, exec);
+ if (IS_ERR(exec)) {
+ switch (PTR_ERR(exec)) {
+ case __XE_VAL_UNIMPLEMENTED:
+ break;
+ case __XE_VAL_UNSUPPORTED:
+ xe_assert(xe, !!obj->dma_buf);
+ break;
+#if IS_ENABLED(CONFIG_KUNIT)
+ case __XE_VAL_OPT_OUT:
+ xe_assert(xe, current->kunit_test);
+ break;
+#endif
+ default:
+ xe_assert(xe, false);
+ }
+ }
+}
+#endif
+
+static int xe_validation_lock(struct xe_validation_ctx *ctx)
+{
+ struct xe_validation_device *val = ctx->val;
+ int ret = 0;
+
+ if (ctx->val_flags.interruptible) {
+ if (ctx->request_exclusive)
+ ret = down_write_killable(&val->lock);
+ else
+ ret = down_read_interruptible(&val->lock);
+ } else {
+ if (ctx->request_exclusive)
+ down_write(&val->lock);
+ else
+ down_read(&val->lock);
+ }
+
+ if (!ret) {
+ ctx->lock_held = true;
+ ctx->lock_held_exclusive = ctx->request_exclusive;
+ }
+
+ return ret;
+}
+
+static int xe_validation_trylock(struct xe_validation_ctx *ctx)
+{
+ struct xe_validation_device *val = ctx->val;
+ bool locked;
+
+ if (ctx->request_exclusive)
+ locked = down_write_trylock(&val->lock);
+ else
+ locked = down_read_trylock(&val->lock);
+
+ if (locked) {
+ ctx->lock_held = true;
+ ctx->lock_held_exclusive = ctx->request_exclusive;
+ }
+
+ return locked ? 0 : -EWOULDBLOCK;
+}
+
+static void xe_validation_unlock(struct xe_validation_ctx *ctx)
+{
+ if (!ctx->lock_held)
+ return;
+
+ if (ctx->lock_held_exclusive)
+ up_write(&ctx->val->lock);
+ else
+ up_read(&ctx->val->lock);
+
+ ctx->lock_held = false;
+}
+
+/**
+ * xe_validation_ctx_init() - Initialize an xe_validation_ctx
+ * @ctx: The xe_validation_ctx to initialize.
+ * @val: The xe_validation_device representing the validation domain.
+ * @exec: The struct drm_exec to use for the transaction. May be NULL.
+ * @flags: The flags to use for initialization.
+ *
+ * Initialize and lock a an xe_validation transaction using the validation domain
+ * represented by @val. Also initialize the drm_exec object forwarding parts of
+ * @flags to the drm_exec initialization. The @flags.exclusive flag should
+ * typically be set to false to avoid locking out other validators from the
+ * domain until an OOM is hit. For testing- or final attempt purposes it can,
+ * however, be set to true.
+ *
+ * Return: %0 on success, %-EINTR if interruptible initial locking failed with a
+ * signal pending. If @flags.no_block is set to true, a failed trylock
+ * returns %-EWOULDBLOCK.
+ */
+int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val,
+ struct drm_exec *exec, const struct xe_val_flags flags)
+{
+ int ret;
+
+ ctx->exec = exec;
+ ctx->val = val;
+ ctx->lock_held = false;
+ ctx->lock_held_exclusive = false;
+ ctx->request_exclusive = flags.exclusive;
+ ctx->val_flags = flags;
+ ctx->exec_flags = 0;
+ ctx->nr = 0;
+
+ if (flags.no_block)
+ ret = xe_validation_trylock(ctx);
+ else
+ ret = xe_validation_lock(ctx);
+ if (ret)
+ return ret;
+
+ if (exec) {
+ if (flags.interruptible)
+ ctx->exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT;
+ if (flags.exec_ignore_duplicates)
+ ctx->exec_flags |= DRM_EXEC_IGNORE_DUPLICATES;
+ drm_exec_init(exec, ctx->exec_flags, ctx->nr);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+/*
+ * This abuses both drm_exec and ww_mutex internals and should be
+ * replaced by checking for -EDEADLK when we can make TTM
+ * stop converting -EDEADLK to -ENOMEM.
+ * An alternative is to not have exhaustive eviction with
+ * CONFIG_DEBUG_WW_MUTEX_SLOWPATH until that happens.
+ */
+static bool xe_validation_contention_injected(struct drm_exec *exec)
+{
+ return !!exec->ticket.contending_lock;
+}
+
+#else
+
+static bool xe_validation_contention_injected(struct drm_exec *exec)
+{
+ return false;
+}
+
+#endif
+
+static bool __xe_validation_should_retry(struct xe_validation_ctx *ctx, int ret)
+{
+ if (ret == -ENOMEM &&
+ ((ctx->request_exclusive &&
+ xe_validation_contention_injected(ctx->exec)) ||
+ !ctx->request_exclusive)) {
+ ctx->request_exclusive = true;
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * xe_validation_exec_lock() - Perform drm_gpuvm_exec_lock within a validation
+ * transaction.
+ * @ctx: An uninitialized xe_validation_ctx.
+ * @vm_exec: An initialized struct vm_exec.
+ * @val: The validation domain.
+ *
+ * The drm_gpuvm_exec_lock() function internally initializes its drm_exec
+ * transaction and therefore doesn't lend itself very well to be using
+ * xe_validation_ctx_init(). Provide a helper that takes an uninitialized
+ * xe_validation_ctx and calls drm_gpuvm_exec_lock() with OOM retry.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int xe_validation_exec_lock(struct xe_validation_ctx *ctx,
+ struct drm_gpuvm_exec *vm_exec,
+ struct xe_validation_device *val)
+{
+ int ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->exec = &vm_exec->exec;
+ ctx->exec_flags = vm_exec->flags;
+ ctx->val = val;
+ if (ctx->exec_flags & DRM_EXEC_INTERRUPTIBLE_WAIT)
+ ctx->val_flags.interruptible = 1;
+ if (ctx->exec_flags & DRM_EXEC_IGNORE_DUPLICATES)
+ ctx->val_flags.exec_ignore_duplicates = 1;
+retry:
+ ret = xe_validation_lock(ctx);
+ if (ret)
+ return ret;
+
+ ret = drm_gpuvm_exec_lock(vm_exec);
+ if (ret) {
+ xe_validation_unlock(ctx);
+ if (__xe_validation_should_retry(ctx, ret))
+ goto retry;
+ }
+
+ return ret;
+}
+
+/**
+ * xe_validation_ctx_fini() - Finalize a validation transaction
+ * @ctx: The Validation transaction to finalize.
+ *
+ * Finalize a validation transaction and its related drm_exec transaction.
+ */
+void xe_validation_ctx_fini(struct xe_validation_ctx *ctx)
+{
+ if (ctx->exec)
+ drm_exec_fini(ctx->exec);
+ xe_validation_unlock(ctx);
+}
+
+/**
+ * xe_validation_should_retry() - Determine if a validation transaction should retry
+ * @ctx: The validation transaction.
+ * @ret: Pointer to a return value variable.
+ *
+ * Determines whether a validation transaction should retry based on the
+ * internal transaction state and the return value pointed to by @ret.
+ * If a validation should be retried, the transaction is prepared for that,
+ * and the validation locked might be re-locked in exclusive mode, and *@ret
+ * is set to %0. If the re-locking errors, typically due to interruptible
+ * locking with signal pending, *@ret is instead set to -EINTR and the
+ * function returns %false.
+ *
+ * Return: %true if validation should be retried, %false otherwise.
+ */
+bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret)
+{
+ if (__xe_validation_should_retry(ctx, *ret)) {
+ drm_exec_fini(ctx->exec);
+ *ret = 0;
+ if (ctx->request_exclusive != ctx->lock_held_exclusive) {
+ xe_validation_unlock(ctx);
+ *ret = xe_validation_lock(ctx);
+ }
+ drm_exec_init(ctx->exec, ctx->exec_flags, ctx->nr);
+ return !*ret;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h
new file mode 100644
index 000000000000..fec331d791e7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_validation.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_VALIDATION_H_
+#define _XE_VALIDATION_H_
+
+#include <linux/dma-resv.h>
+#include <linux/types.h>
+#include <linux/rwsem.h>
+
+struct drm_exec;
+struct drm_gem_object;
+struct drm_gpuvm_exec;
+struct xe_device;
+
+#ifdef CONFIG_PROVE_LOCKING
+/**
+ * xe_validation_lockdep() - Assert that a drm_exec locking transaction can
+ * be initialized at this point.
+ */
+static inline void xe_validation_lockdep(void)
+{
+ struct ww_acquire_ctx ticket;
+
+ ww_acquire_init(&ticket, &reservation_ww_class);
+ ww_acquire_fini(&ticket);
+}
+#else
+static inline void xe_validation_lockdep(void)
+{
+}
+#endif
+
+/*
+ * Various values of the drm_exec pointer where we've not (yet)
+ * implemented full ww locking.
+ *
+ * XE_VALIDATION_UNIMPLEMENTED means implementation is pending.
+ * A lockdep check is made to assure that a drm_exec locking
+ * transaction can actually take place where the macro is
+ * used. If this asserts, the exec pointer needs to be assigned
+ * higher up in the callchain and passed down.
+ *
+ * XE_VALIDATION_UNSUPPORTED is for dma-buf code only where
+ * the dma-buf layer doesn't support WW locking.
+ *
+ * XE_VALIDATION_OPT_OUT is for simplification of kunit tests where
+ * exhaustive eviction isn't necessary.
+ */
+#define __XE_VAL_UNIMPLEMENTED -EINVAL
+#define XE_VALIDATION_UNIMPLEMENTED (xe_validation_lockdep(), \
+ (struct drm_exec *)ERR_PTR(__XE_VAL_UNIMPLEMENTED))
+
+#define __XE_VAL_UNSUPPORTED -EOPNOTSUPP
+#define XE_VALIDATION_UNSUPPORTED ((struct drm_exec *)ERR_PTR(__XE_VAL_UNSUPPORTED))
+
+#define __XE_VAL_OPT_OUT -ENOMEM
+#define XE_VALIDATION_OPT_OUT (xe_validation_lockdep(), \
+ (struct drm_exec *)ERR_PTR(__XE_VAL_OPT_OUT))
+#ifdef CONFIG_DRM_XE_DEBUG
+void xe_validation_assert_exec(const struct xe_device *xe, const struct drm_exec *exec,
+ const struct drm_gem_object *obj);
+#else
+#define xe_validation_assert_exec(_xe, _exec, _obj) \
+ do { \
+ (void)_xe; (void)_exec; (void)_obj; \
+ } while (0)
+#endif
+
+/**
+ * struct xe_validation_device - The domain for exhaustive eviction
+ * @lock: The lock used to exclude other processes from allocating graphics memory
+ *
+ * The struct xe_validation_device represents the domain for which we want to use
+ * exhaustive eviction. The @lock is typically grabbed in read mode for allocations
+ * but when graphics memory allocation fails, it is retried with the write mode held.
+ */
+struct xe_validation_device {
+ struct rw_semaphore lock;
+};
+
+/**
+ * struct xe_val_flags - Flags for xe_validation_ctx_init().
+ * @exclusive: Start the validation transaction by locking out all other validators.
+ * @no_block: Don't block on initialization.
+ * @interruptible: Block interruptible if blocking. Implies initializing the drm_exec
+ * context with the DRM_EXEC_INTERRUPTIBLE_WAIT flag.
+ * @exec_ignore_duplicates: Initialize the drm_exec context with the
+ * DRM_EXEC_IGNORE_DUPLICATES flag.
+ */
+struct xe_val_flags {
+ u32 exclusive :1;
+ u32 no_block :1;
+ u32 interruptible :1;
+ u32 exec_ignore_duplicates :1;
+};
+
+/**
+ * struct xe_validation_ctx - A struct drm_exec subclass with support for
+ * exhaustive eviction
+ * @exec: The drm_exec object base class. Note that we use a pointer instead of
+ * embedding to avoid diamond inheritance.
+ * @val: The exhaustive eviction domain.
+ * @val_flags: Copy of the struct xe_val_flags passed to xe_validation_ctx_init.
+ * @lock_held: Whether The domain lock is currently held.
+ * @lock_held_exclusive: Whether the domain lock is held in exclusive mode.
+ * @request_exclusive: Whether to lock exclusively (write mode) the next time
+ * the domain lock is locked.
+ * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization.
+ * @nr: The drm_exec nr parameter used for drm_exec (re-)initializaiton.
+ */
+struct xe_validation_ctx {
+ struct drm_exec *exec;
+ struct xe_validation_device *val;
+ struct xe_val_flags val_flags;
+ bool lock_held;
+ bool lock_held_exclusive;
+ bool request_exclusive;
+ u32 exec_flags;
+ unsigned int nr;
+};
+
+int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val,
+ struct drm_exec *exec, const struct xe_val_flags flags);
+
+int xe_validation_exec_lock(struct xe_validation_ctx *ctx, struct drm_gpuvm_exec *vm_exec,
+ struct xe_validation_device *val);
+
+void xe_validation_ctx_fini(struct xe_validation_ctx *ctx);
+
+bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret);
+
+/**
+ * xe_validation_retry_on_oom() - Retry on oom in an xe_validaton transaction
+ * @_ctx: Pointer to the xe_validation_ctx
+ * @_ret: The current error value possibly holding -ENOMEM
+ *
+ * Use this in way similar to drm_exec_retry_on_contention().
+ * If @_ret contains -ENOMEM the tranaction is restarted once in a way that
+ * blocks other transactions and allows exhastive eviction. If the transaction
+ * was already restarted once, Just return the -ENOMEM. May also set
+ * _ret to -EINTR if not retrying and waits are interruptible.
+ * May only be used within a drm_exec_until_all_locked() loop.
+ */
+#define xe_validation_retry_on_oom(_ctx, _ret) \
+ do { \
+ if (xe_validation_should_retry(_ctx, _ret)) \
+ goto *__drm_exec_retry_ptr; \
+ } while (0)
+
+/**
+ * xe_validation_device_init - Initialize a struct xe_validation_device
+ * @val: The xe_validation_device to init.
+ */
+static inline void
+xe_validation_device_init(struct xe_validation_device *val)
+{
+ init_rwsem(&val->lock);
+}
+
+/*
+ * Make guard() and scoped_guard() work with xe_validation_ctx
+ * so that we can exit transactions without caring about the
+ * cleanup.
+ */
+DEFINE_CLASS(xe_validation, struct xe_validation_ctx *,
+ if (_T) xe_validation_ctx_fini(_T);,
+ ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags);
+ _ret ? NULL : _ctx; }),
+ struct xe_validation_ctx *_ctx, struct xe_validation_device *_val,
+ struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret);
+static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T)
+{return *_T; }
+#define class_xe_validation_is_conditional true
+
+/**
+ * xe_validation_guard() - An auto-cleanup xe_validation_ctx transaction
+ * @_ctx: The xe_validation_ctx.
+ * @_val: The xe_validation_device.
+ * @_exec: The struct drm_exec object
+ * @_flags: Flags for the xe_validation_ctx initialization.
+ * @_ret: Return in / out parameter. May be set by this macro. Typicall 0 when called.
+ *
+ * This macro is will initiate a drm_exec transaction with additional support for
+ * exhaustive eviction.
+ */
+#define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \
+ scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \
+ drm_exec_until_all_locked(_exec)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index db6b9a6651b7..0cacab20ff85 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -41,7 +41,6 @@
#include "xe_tlb_inval.h"
#include "xe_trace_bo.h"
#include "xe_wa.h"
-#include "xe_hmm.h"
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
@@ -49,34 +48,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
}
/**
- * xe_vma_userptr_check_repin() - Advisory check for repin needed
- * @uvma: The userptr vma
+ * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
+ * @vm: The vm whose resv is to be locked.
+ * @exec: The drm_exec transaction.
*
- * Check if the userptr vma has been invalidated since last successful
- * repin. The check is advisory only and can the function can be called
- * without the vm->userptr.notifier_lock held. There is no guarantee that the
- * vma userptr will remain valid after a lockless check, so typically
- * the call needs to be followed by a proper check under the notifier_lock.
+ * Helper to lock the vm's resv as part of a drm_exec transaction.
*
- * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ * Return: %0 on success. See drm_exec_lock_obj() for error codes.
*/
-int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
{
- return mmu_interval_check_retry(&uvma->userptr.notifier,
- uvma->userptr.notifier_seq) ?
- -EAGAIN : 0;
-}
-
-int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
-{
- struct xe_vma *vma = &uvma->vma;
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_device *xe = vm->xe;
-
- lockdep_assert_held(&vm->lock);
- xe_assert(xe, xe_vma_is_userptr(vma));
-
- return xe_hmm_userptr_populate_range(uvma, false);
+ return drm_exec_lock_obj(exec, xe_vm_obj(vm));
}
static bool preempt_fences_waiting(struct xe_vm *vm)
@@ -228,6 +210,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
.num_fences = 1,
};
struct drm_exec *exec = &vm_exec.exec;
+ struct xe_validation_ctx ctx;
struct dma_fence *pfence;
int err;
bool wait;
@@ -235,7 +218,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
down_write(&vm->lock);
- err = drm_gpuvm_exec_lock(&vm_exec);
+ err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
if (err)
goto out_up_write;
@@ -250,7 +233,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
++vm->preempt.num_exec_queues;
q->lr.pfence = pfence;
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
@@ -264,10 +247,10 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
if (wait)
dma_fence_enable_sw_signaling(pfence);
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
out_fini:
- drm_exec_fini(exec);
+ xe_validation_ctx_fini(&ctx);
out_up_write:
up_write(&vm->lock);
@@ -300,25 +283,6 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
up_write(&vm->lock);
}
-/**
- * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
- * that need repinning.
- * @vm: The VM.
- *
- * This function checks for whether the VM has userptrs that need repinning,
- * and provides a release-type barrier on the userptr.notifier_lock after
- * checking.
- *
- * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
- */
-int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
-{
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
-
- return (list_empty(&vm->userptr.repin_list) &&
- list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
-}
-
#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
/**
@@ -350,39 +314,6 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked)
/* TODO: Inform user the VM is banned */
}
-/**
- * xe_vm_validate_should_retry() - Whether to retry after a validate error.
- * @exec: The drm_exec object used for locking before validation.
- * @err: The error returned from ttm_bo_validate().
- * @end: A ktime_t cookie that should be set to 0 before first use and
- * that should be reused on subsequent calls.
- *
- * With multiple active VMs, under memory pressure, it is possible that
- * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
- * Until ttm properly handles locking in such scenarios, best thing the
- * driver can do is retry with a timeout. Check if that is necessary, and
- * if so unlock the drm_exec's objects while keeping the ticket to prepare
- * for a rerun.
- *
- * Return: true if a retry after drm_exec_init() is recommended;
- * false otherwise.
- */
-bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
-{
- ktime_t cur;
-
- if (err != -ENOMEM)
- return false;
-
- cur = ktime_get();
- *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
- if (!ktime_before(cur, *end))
- return false;
-
- msleep(20);
- return true;
-}
-
static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
{
struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
@@ -397,7 +328,7 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
if (!try_wait_for_completion(&vm->xe->pm_block))
return -EAGAIN;
- ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
+ ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
if (ret)
return ret;
@@ -513,10 +444,10 @@ void xe_vm_resume_rebind_worker(struct xe_vm *vm)
static void preempt_rebind_work_func(struct work_struct *w)
{
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
unsigned int fence_count = 0;
LIST_HEAD(preempt_fences);
- ktime_t end = 0;
int err = 0;
long wait;
int __maybe_unused tries = 0;
@@ -544,18 +475,19 @@ retry:
goto out_unlock_outer;
}
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
+ (struct xe_val_flags) {.interruptible = true});
+ if (err)
+ goto out_unlock_outer;
drm_exec_until_all_locked(&exec) {
bool done = false;
err = xe_preempt_work_begin(&exec, vm, &done);
drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
if (err || done) {
- drm_exec_fini(&exec);
- if (err && xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
-
+ xe_validation_ctx_fini(&ctx);
goto out_unlock_outer;
}
}
@@ -564,7 +496,9 @@ retry:
if (err)
goto out_unlock;
+ xe_vm_set_validation_exec(vm, &exec);
err = xe_vm_rebind(vm, true);
+ xe_vm_set_validation_exec(vm, NULL);
if (err)
goto out_unlock;
@@ -582,9 +516,9 @@ retry:
(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
__xe_vm_userptr_needs_repin(__vm))
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
if (retry_required(tries, vm)) {
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
err = -EAGAIN;
goto out_unlock;
}
@@ -598,10 +532,10 @@ retry:
/* Point of no return. */
arm_preempt_fences(vm, &preempt_fences);
resume_and_reinstall_preempt_fences(vm, &exec);
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
out_unlock:
- drm_exec_fini(&exec);
+ xe_validation_ctx_fini(&ctx);
out_unlock_outer:
if (err == -EAGAIN) {
trace_xe_vm_rebind_worker_retry(vm);
@@ -619,203 +553,6 @@ out_unlock_outer:
trace_xe_vm_rebind_worker_exit(vm);
}
-static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vma *vma = &uvma->vma;
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- long err;
-
- /*
- * Tell exec and rebind worker they need to repin and rebind this
- * userptr.
- */
- if (!xe_vm_in_fault_mode(vm) &&
- !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
- spin_lock(&vm->userptr.invalidated_lock);
- list_move_tail(&userptr->invalidate_link,
- &vm->userptr.invalidated);
- spin_unlock(&vm->userptr.invalidated_lock);
- }
-
- /*
- * Preempt fences turn into schedule disables, pipeline these.
- * Note that even in fault mode, we need to wait for binds and
- * unbinds to complete, and those are attached as BOOKMARK fences
- * to the vm.
- */
- dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP);
- dma_resv_for_each_fence_unlocked(&cursor, fence)
- dma_fence_enable_sw_signaling(fence);
- dma_resv_iter_end(&cursor);
-
- err = dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
- false, MAX_SCHEDULE_TIMEOUT);
- XE_WARN_ON(err <= 0);
-
- if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
- err = xe_vm_invalidate_vma(vma);
- XE_WARN_ON(err);
- }
-
- xe_hmm_userptr_unmap(uvma);
-}
-
-static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
- const struct mmu_notifier_range *range,
- unsigned long cur_seq)
-{
- struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
- struct xe_vma *vma = &uvma->vma;
- struct xe_vm *vm = xe_vma_vm(vma);
-
- xe_assert(vm->xe, xe_vma_is_userptr(vma));
- trace_xe_vma_userptr_invalidate(vma);
-
- if (!mmu_notifier_range_blockable(range))
- return false;
-
- vm_dbg(&xe_vma_vm(vma)->xe->drm,
- "NOTIFIER: addr=0x%016llx, range=0x%016llx",
- xe_vma_start(vma), xe_vma_size(vma));
-
- down_write(&vm->userptr.notifier_lock);
- mmu_interval_set_seq(mni, cur_seq);
-
- __vma_userptr_invalidate(vm, uvma);
- up_write(&vm->userptr.notifier_lock);
- trace_xe_vma_userptr_invalidate_complete(vma);
-
- return true;
-}
-
-static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
- .invalidate = vma_userptr_invalidate,
-};
-
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
-/**
- * xe_vma_userptr_force_invalidate() - force invalidate a userptr
- * @uvma: The userptr vma to invalidate
- *
- * Perform a forced userptr invalidation for testing purposes.
- */
-void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
-{
- struct xe_vm *vm = xe_vma_vm(&uvma->vma);
-
- /* Protect against concurrent userptr pinning */
- lockdep_assert_held(&vm->lock);
- /* Protect against concurrent notifiers */
- lockdep_assert_held(&vm->userptr.notifier_lock);
- /*
- * Protect against concurrent instances of this function and
- * the critical exec sections
- */
- xe_vm_assert_held(vm);
-
- if (!mmu_interval_read_retry(&uvma->userptr.notifier,
- uvma->userptr.notifier_seq))
- uvma->userptr.notifier_seq -= 2;
- __vma_userptr_invalidate(vm, uvma);
-}
-#endif
-
-int xe_vm_userptr_pin(struct xe_vm *vm)
-{
- struct xe_userptr_vma *uvma, *next;
- int err = 0;
-
- xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
- lockdep_assert_held_write(&vm->lock);
-
- /* Collect invalidated userptrs */
- spin_lock(&vm->userptr.invalidated_lock);
- xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
- list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
- userptr.invalidate_link) {
- list_del_init(&uvma->userptr.invalidate_link);
- list_add_tail(&uvma->userptr.repin_link,
- &vm->userptr.repin_list);
- }
- spin_unlock(&vm->userptr.invalidated_lock);
-
- /* Pin and move to bind list */
- list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
- userptr.repin_link) {
- err = xe_vma_userptr_pin_pages(uvma);
- if (err == -EFAULT) {
- list_del_init(&uvma->userptr.repin_link);
- /*
- * We might have already done the pin once already, but
- * then had to retry before the re-bind happened, due
- * some other condition in the caller, but in the
- * meantime the userptr got dinged by the notifier such
- * that we need to revalidate here, but this time we hit
- * the EFAULT. In such a case make sure we remove
- * ourselves from the rebind list to avoid going down in
- * flames.
- */
- if (!list_empty(&uvma->vma.combined_links.rebind))
- list_del_init(&uvma->vma.combined_links.rebind);
-
- /* Wait for pending binds */
- xe_vm_lock(vm, false);
- dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
- false, MAX_SCHEDULE_TIMEOUT);
-
- down_read(&vm->userptr.notifier_lock);
- err = xe_vm_invalidate_vma(&uvma->vma);
- up_read(&vm->userptr.notifier_lock);
- xe_vm_unlock(vm);
- if (err)
- break;
- } else {
- if (err)
- break;
-
- list_del_init(&uvma->userptr.repin_link);
- list_move_tail(&uvma->vma.combined_links.rebind,
- &vm->rebind_list);
- }
- }
-
- if (err) {
- down_write(&vm->userptr.notifier_lock);
- spin_lock(&vm->userptr.invalidated_lock);
- list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
- userptr.repin_link) {
- list_del_init(&uvma->userptr.repin_link);
- list_move_tail(&uvma->userptr.invalidate_link,
- &vm->userptr.invalidated);
- }
- spin_unlock(&vm->userptr.invalidated_lock);
- up_write(&vm->userptr.notifier_lock);
- }
- return err;
-}
-
-/**
- * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
- * that need repinning.
- * @vm: The VM.
- *
- * This function does an advisory check for whether the VM has userptrs that
- * need repinning.
- *
- * Return: 0 if there are no indications of userptrs needing repinning,
- * -EAGAIN if there are.
- */
-int xe_vm_userptr_check_repin(struct xe_vm *vm)
-{
- return (list_empty_careful(&vm->userptr.repin_list) &&
- list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
-}
-
static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
{
int i;
@@ -1280,25 +1017,17 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
drm_gpuvm_bo_put(vm_bo);
} else /* userptr or null */ {
if (!is_null && !is_cpu_addr_mirror) {
- struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+ struct xe_userptr_vma *uvma = to_userptr_vma(vma);
u64 size = end - start + 1;
int err;
- INIT_LIST_HEAD(&userptr->invalidate_link);
- INIT_LIST_HEAD(&userptr->repin_link);
vma->gpuva.gem.offset = bo_offset_or_userptr;
- mutex_init(&userptr->unmap_mutex);
- err = mmu_interval_notifier_insert(&userptr->notifier,
- current->mm,
- xe_vma_userptr(vma), size,
- &vma_userptr_notifier_ops);
+ err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
if (err) {
xe_vma_free(vma);
return ERR_PTR(err);
}
-
- userptr->notifier_seq = LONG_MAX;
}
xe_vm_get(vm);
@@ -1318,18 +1047,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
if (xe_vma_is_userptr(vma)) {
struct xe_userptr_vma *uvma = to_userptr_vma(vma);
- struct xe_userptr *userptr = &uvma->userptr;
-
- if (userptr->sg)
- xe_hmm_userptr_free_sg(uvma);
- /*
- * Since userptr pages are not pinned, we can't remove
- * the notifier until we're sure the GPU is not accessing
- * them anymore
- */
- mmu_interval_notifier_remove(&userptr->notifier);
- mutex_destroy(&userptr->unmap_mutex);
+ xe_userptr_remove(uvma);
xe_vm_put(vm);
} else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
xe_vm_put(vm);
@@ -1366,11 +1085,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
if (xe_vma_is_userptr(vma)) {
xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
-
- spin_lock(&vm->userptr.invalidated_lock);
- xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
- list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
- spin_unlock(&vm->userptr.invalidated_lock);
+ xe_userptr_destroy(to_userptr_vma(vma));
} else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
xe_bo_assert_held(xe_vma_bo(vma));
@@ -1418,20 +1133,19 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
static void xe_vma_destroy_unlocked(struct xe_vma *vma)
{
+ struct xe_device *xe = xe_vma_vm(vma)->xe;
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
- int err;
+ int err = 0;
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
err = xe_vm_lock_vma(&exec, vma);
drm_exec_retry_on_contention(&exec);
if (XE_WARN_ON(err))
break;
+ xe_vma_destroy(vma, NULL);
}
-
- xe_vma_destroy(vma, NULL);
-
- drm_exec_fini(&exec);
+ xe_assert(xe, !err);
}
struct xe_vma *
@@ -1656,6 +1370,7 @@ static void vm_destroy_work_func(struct work_struct *w);
* @xe: xe device.
* @tile: tile to set up for.
* @vm: vm to set up for.
+ * @exec: The struct drm_exec object used to lock the vm resv.
*
* Sets up a pagetable tree with one page-table per level and a single
* leaf PTE. All pagetable entries point to the single page-table or,
@@ -1665,20 +1380,19 @@ static void vm_destroy_work_func(struct work_struct *w);
* Return: 0 on success, negative error code on error.
*/
static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm)
+ struct xe_vm *vm, struct drm_exec *exec)
{
u8 id = tile->id;
int i;
for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
- vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
+ vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
if (IS_ERR(vm->scratch_pt[id][i])) {
int err = PTR_ERR(vm->scratch_pt[id][i]);
vm->scratch_pt[id][i] = NULL;
return err;
}
-
xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
}
@@ -1706,9 +1420,26 @@ static void xe_vm_free_scratch(struct xe_vm *vm)
}
}
+static void xe_vm_pt_destroy(struct xe_vm *vm)
+{
+ struct xe_tile *tile;
+ u8 id;
+
+ xe_vm_assert_held(vm);
+
+ for_each_tile(tile, vm->xe, id) {
+ if (vm->pt_root[id]) {
+ xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+ vm->pt_root[id] = NULL;
+ }
+ }
+}
+
struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
{
struct drm_gem_object *vm_resv_obj;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_vm *vm;
int err, number_tiles = 0;
struct xe_tile *tile;
@@ -1752,7 +1483,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
INIT_LIST_HEAD(&vm->userptr.repin_list);
INIT_LIST_HEAD(&vm->userptr.invalidated);
- init_rwsem(&vm->userptr.notifier_lock);
spin_lock_init(&vm->userptr.invalidated_lock);
ttm_lru_bulk_move_init(&vm->lru_bulk_move);
@@ -1779,11 +1509,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
}
- if (flags & XE_VM_FLAG_FAULT_MODE) {
- err = xe_svm_init(vm);
- if (err)
- goto err_no_resv;
- }
+ err = xe_svm_init(vm);
+ if (err)
+ goto err_no_resv;
vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
if (!vm_resv_obj) {
@@ -1796,49 +1524,68 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
drm_gem_object_put(vm_resv_obj);
- err = xe_vm_lock(vm, true);
- if (err)
- goto err_close;
-
- if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
- vm->flags |= XE_VM_FLAG_64K;
-
- for_each_tile(tile, xe, id) {
- if (flags & XE_VM_FLAG_MIGRATION &&
- tile->id != XE_VM_FLAG_TILE_ID(flags))
- continue;
+ err = 0;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
- vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
- if (IS_ERR(vm->pt_root[id])) {
- err = PTR_ERR(vm->pt_root[id]);
- vm->pt_root[id] = NULL;
- goto err_unlock_close;
- }
- }
+ if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+ vm->flags |= XE_VM_FLAG_64K;
- if (xe_vm_has_scratch(vm)) {
for_each_tile(tile, xe, id) {
- if (!vm->pt_root[id])
+ if (flags & XE_VM_FLAG_MIGRATION &&
+ tile->id != XE_VM_FLAG_TILE_ID(flags))
continue;
- err = xe_vm_create_scratch(xe, tile, vm);
+ vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
+ &exec);
+ if (IS_ERR(vm->pt_root[id])) {
+ err = PTR_ERR(vm->pt_root[id]);
+ vm->pt_root[id] = NULL;
+ xe_vm_pt_destroy(vm);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
+ if (err)
+ break;
+
+ if (xe_vm_has_scratch(vm)) {
+ for_each_tile(tile, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
+
+ err = xe_vm_create_scratch(xe, tile, vm, &exec);
+ if (err) {
+ xe_vm_free_scratch(vm);
+ xe_vm_pt_destroy(vm);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
if (err)
- goto err_unlock_close;
+ break;
+ vm->batch_invalidate_tlb = true;
}
- vm->batch_invalidate_tlb = true;
- }
- if (vm->flags & XE_VM_FLAG_LR_MODE)
- vm->batch_invalidate_tlb = false;
+ if (vm->flags & XE_VM_FLAG_LR_MODE) {
+ INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+ vm->batch_invalidate_tlb = false;
+ }
- /* Fill pt_root after allocating scratch tables */
- for_each_tile(tile, xe, id) {
- if (!vm->pt_root[id])
- continue;
+ /* Fill pt_root after allocating scratch tables */
+ for_each_tile(tile, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
- xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+ xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+ }
}
- xe_vm_unlock(vm);
+ if (err)
+ goto err_close;
/* Kernel migration VM shouldn't have a circular loop.. */
if (!(flags & XE_VM_FLAG_MIGRATION)) {
@@ -1871,7 +1618,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
&xe->usm.next_asid, GFP_KERNEL);
up_write(&xe->usm.lock);
if (err < 0)
- goto err_unlock_close;
+ goto err_close;
vm->usm.asid = asid;
}
@@ -1880,8 +1627,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
return vm;
-err_unlock_close:
- xe_vm_unlock(vm);
err_close:
xe_vm_close_and_put(vm);
return ERR_PTR(err);
@@ -1988,9 +1733,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
vma = gpuva_to_vma(gpuva);
if (xe_vma_has_no_bo(vma)) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags |= XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
}
xe_vm_remove_vma(vm, vma);
@@ -2014,13 +1759,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
* destroy the pagetables immediately.
*/
xe_vm_free_scratch(vm);
-
- for_each_tile(tile, xe, id) {
- if (vm->pt_root[id]) {
- xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
- vm->pt_root[id] = NULL;
- }
- }
+ xe_vm_pt_destroy(vm);
xe_vm_unlock(vm);
/*
@@ -2034,8 +1773,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_vma_destroy_unlocked(vma);
}
- if (xe_vm_in_fault_mode(vm))
- xe_svm_fini(vm);
+ xe_svm_fini(vm);
up_write(&vm->lock);
@@ -2328,6 +2066,8 @@ int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_
err = copy_to_user(attrs_user, mem_attrs,
args->sizeof_mem_range_attr * args->num_mem_ranges);
+ if (err)
+ err = -EFAULT;
free_mem_attrs:
kvfree(mem_attrs);
@@ -2376,9 +2116,9 @@ static const u32 region_to_mem_type[] = {
static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
bool post_commit)
{
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags |= XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_remove_vma(vm, vma);
}
@@ -2639,6 +2379,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
struct xe_vma_mem_attr *attr, unsigned int flags)
{
struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct xe_vma *vma;
int err = 0;
@@ -2646,9 +2387,9 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
lockdep_assert_held_write(&vm->lock);
if (bo) {
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
- drm_exec_until_all_locked(&exec) {
- err = 0;
+ err = 0;
+ xe_validation_guard(&ctx, &vm->xe->val, &exec,
+ (struct xe_val_flags) {.interruptible = true}, err) {
if (!bo->vm) {
err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
drm_exec_retry_on_contention(&exec);
@@ -2657,27 +2398,35 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
err = drm_exec_lock_obj(&exec, &bo->ttm.base);
drm_exec_retry_on_contention(&exec);
}
- if (err) {
- drm_exec_fini(&exec);
+ if (err)
return ERR_PTR(err);
- }
- }
- }
- vma = xe_vma_create(vm, bo, op->gem.offset,
- op->va.addr, op->va.addr +
- op->va.range - 1, attr, flags);
- if (IS_ERR(vma))
- goto err_unlock;
- if (xe_vma_is_userptr(vma))
- err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
- else if (!xe_vma_has_no_bo(vma) && !bo->vm)
- err = add_preempt_fences(vm, bo);
+ vma = xe_vma_create(vm, bo, op->gem.offset,
+ op->va.addr, op->va.addr +
+ op->va.range - 1, attr, flags);
+ if (IS_ERR(vma))
+ return vma;
-err_unlock:
- if (bo)
- drm_exec_fini(&exec);
+ if (!bo->vm) {
+ err = add_preempt_fences(vm, bo);
+ if (err) {
+ prep_vma_destroy(vm, vma, false);
+ xe_vma_destroy(vma, NULL);
+ }
+ }
+ }
+ if (err)
+ return ERR_PTR(err);
+ } else {
+ vma = xe_vma_create(vm, NULL, op->gem.offset,
+ op->va.addr, op->va.addr +
+ op->va.range - 1, attr, flags);
+ if (IS_ERR(vma))
+ return vma;
+ if (xe_vma_is_userptr(vma))
+ err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+ }
if (err) {
prep_vma_destroy(vm, vma, false);
xe_vma_destroy_unlocked(vma);
@@ -3021,9 +2770,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
if (vma) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags &= ~XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_insert_vma(vm, vma);
}
@@ -3042,9 +2791,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
xe_vma_destroy_unlocked(op->remap.next);
}
if (vma) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags &= ~XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_insert_vma(vm, vma);
}
@@ -3094,7 +2843,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
err = drm_exec_lock_obj(exec, &bo->ttm.base);
if (!err && validate)
err = xe_bo_validate(bo, vm,
- !xe_vm_in_preempt_fence_mode(vm));
+ !xe_vm_in_preempt_fence_mode(vm), exec);
}
return err;
@@ -3212,7 +2961,9 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
false);
if (!err && !xe_vma_has_no_bo(vma))
err = xe_bo_migrate(xe_vma_bo(vma),
- region_to_mem_type[region]);
+ region_to_mem_type[region],
+ NULL,
+ exec);
break;
}
default:
@@ -3475,35 +3226,37 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
struct xe_vma_ops *vops)
{
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct dma_fence *fence;
- int err;
+ int err = 0;
lockdep_assert_held_write(&vm->lock);
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES, 0);
- drm_exec_until_all_locked(&exec) {
+ xe_validation_guard(&ctx, &vm->xe->val, &exec,
+ ((struct xe_val_flags) {
+ .interruptible = true,
+ .exec_ignore_duplicates = true,
+ }), err) {
err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
drm_exec_retry_on_contention(&exec);
- if (err) {
- fence = ERR_PTR(err);
- goto unlock;
- }
+ xe_validation_retry_on_oom(&ctx, &err);
+ if (err)
+ return ERR_PTR(err);
+ xe_vm_set_validation_exec(vm, &exec);
fence = ops_execute(vm, vops);
+ xe_vm_set_validation_exec(vm, NULL);
if (IS_ERR(fence)) {
if (PTR_ERR(fence) == -ENODATA)
vm_bind_ioctl_ops_fini(vm, vops, NULL);
- goto unlock;
+ return fence;
}
vm_bind_ioctl_ops_fini(vm, vops, fence);
}
-unlock:
- drm_exec_fini(&exec);
- return fence;
+ return err ? ERR_PTR(err) : fence;
}
ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
@@ -3619,6 +3372,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+ XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
+ !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_PREFETCH) ||
XE_IOCTL_DBG(xe, prefetch_region &&
@@ -4054,10 +3809,14 @@ release_vm_lock:
*/
int xe_vm_lock(struct xe_vm *vm, bool intr)
{
+ int ret;
+
if (intr)
- return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+ ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+ else
+ ret = dma_resv_lock(xe_vm_resv(vm), NULL);
- return dma_resv_lock(xe_vm_resv(vm), NULL);
+ return ret;
}
/**
@@ -4164,13 +3923,13 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
*/
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
if (xe_vma_is_userptr(vma)) {
- lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) ||
- (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) &&
+ lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
+ (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
WARN_ON_ONCE(!mmu_interval_check_retry
(&to_userptr_vma(vma)->userptr.notifier,
- to_userptr_vma(vma)->userptr.notifier_seq));
+ to_userptr_vma(vma)->userptr.pages.notifier_seq));
WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
DMA_RESV_USAGE_BOOKKEEP));
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index d631c4b25c51..ef8a5019574e 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -220,12 +220,6 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm)
int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
-int xe_vm_userptr_pin(struct xe_vm *vm);
-
-int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
-
-int xe_vm_userptr_check_repin(struct xe_vm *vm);
-
int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
u8 tile_mask);
@@ -266,12 +260,6 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
}
}
-int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
-
-int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
-
-bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
-
int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
@@ -302,6 +290,8 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked);
*/
#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec);
+
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
#define vm_dbg drm_dbg
#else
@@ -331,7 +321,7 @@ static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict)
if (vm && !allow_res_evict) {
xe_vm_assert_held(vm);
/* Pairs with READ_ONCE in xe_vm_is_validating() */
- WRITE_ONCE(vm->validating, current);
+ WRITE_ONCE(vm->validation.validating, current);
}
}
@@ -349,7 +339,7 @@ static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict
{
if (vm && !allow_res_evict) {
/* Pairs with READ_ONCE in xe_vm_is_validating() */
- WRITE_ONCE(vm->validating, NULL);
+ WRITE_ONCE(vm->validation.validating, NULL);
}
}
@@ -367,7 +357,7 @@ static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict
static inline bool xe_vm_is_validating(struct xe_vm *vm)
{
/* Pairs with WRITE_ONCE in xe_vm_is_validating() */
- if (READ_ONCE(vm->validating) == current) {
+ if (READ_ONCE(vm->validation.validating) == current) {
xe_vm_assert_held(vm);
return true;
}
@@ -375,6 +365,34 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm)
}
/**
+ * xe_vm_set_validation_exec() - Accessor to set the drm_exec object
+ * @vm: The vm we want to register a drm_exec object with.
+ * @exec: The exec object we want to register.
+ *
+ * Set the drm_exec object used to lock the vm's resv.
+ */
+static inline void xe_vm_set_validation_exec(struct xe_vm *vm, struct drm_exec *exec)
+{
+ xe_vm_assert_held(vm);
+ xe_assert(vm->xe, !!exec ^ !!vm->validation._exec);
+ vm->validation._exec = exec;
+}
+
+/**
+ * xe_vm_set_validation_exec() - Accessor to read the drm_exec object
+ * @vm: The vm we want to register a drm_exec object with.
+ *
+ * Return: The drm_exec object used to lock the vm's resv. The value
+ * is a valid pointer, %NULL, or one of the special values defined in
+ * xe_validation.h.
+ */
+static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm)
+{
+ xe_vm_assert_held(vm);
+ return vm->validation._exec;
+}
+
+/**
* xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has
* a valid GPU mapping
* @tile: The tile which the GPU mapping belongs to
@@ -393,11 +411,4 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm)
#define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \
((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id))
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
-void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
-#else
-static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
-{
-}
-#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 09c5783ee523..cad3cf627c3f 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -18,9 +18,8 @@ struct xe_vmas_in_madvise_range {
u64 range;
struct xe_vma **vmas;
int num_vmas;
- bool has_svm_vmas;
bool has_bo_vmas;
- bool has_userptr_vmas;
+ bool has_svm_userptr_vmas;
};
static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
@@ -46,10 +45,8 @@ static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_r
if (xe_vma_bo(vma))
madvise_range->has_bo_vmas = true;
- else if (xe_vma_is_cpu_addr_mirror(vma))
- madvise_range->has_svm_vmas = true;
- else if (xe_vma_is_userptr(vma))
- madvise_range->has_userptr_vmas = true;
+ else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma))
+ madvise_range->has_svm_userptr_vmas = true;
if (madvise_range->num_vmas == max_vmas) {
max_vmas <<= 1;
@@ -127,8 +124,6 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
vmas[i]->attr.atomic_access = op->atomic.val;
}
- vmas[i]->attr.atomic_access = op->atomic.val;
-
bo = xe_vma_bo(vmas[i]);
if (!bo || bo->attr.atomic_access == op->atomic.val)
continue;
@@ -201,12 +196,12 @@ static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
if (xe_pt_zap_ptes(tile, vma)) {
tile_mask |= BIT(id);
- /*
- * WRITE_ONCE pairs with READ_ONCE
- * in xe_vm_has_valid_gpu_mapping()
- */
- WRITE_ONCE(vma->tile_invalidated,
- vma->tile_invalidated | BIT(id));
+ /*
+ * WRITE_ONCE pairs with READ_ONCE
+ * in xe_vm_has_valid_gpu_mapping()
+ */
+ WRITE_ONCE(vma->tile_invalidated,
+ vma->tile_invalidated | BIT(id));
}
}
}
@@ -256,7 +251,7 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
return false;
- if (XE_IOCTL_DBG(xe, args->atomic.reserved))
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
return false;
break;
}
@@ -409,29 +404,20 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
}
}
- if (madvise_range.has_userptr_vmas) {
- err = down_read_interruptible(&vm->userptr.notifier_lock);
+ if (madvise_range.has_svm_userptr_vmas) {
+ err = xe_svm_notifier_lock_interruptible(vm);
if (err)
goto err_fini;
}
- if (madvise_range.has_svm_vmas) {
- err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock);
- if (err)
- goto unlock_userptr;
- }
-
attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
- if (madvise_range.has_svm_vmas)
+ if (madvise_range.has_svm_userptr_vmas)
xe_svm_notifier_unlock(vm);
-unlock_userptr:
- if (madvise_range.has_userptr_vmas)
- up_read(&vm->userptr.notifier_lock);
err_fini:
if (madvise_range.has_bo_vmas)
drm_exec_fini(&exec);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index e1a786db5f89..da39940501d8 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -17,6 +17,7 @@
#include "xe_device_types.h"
#include "xe_pt_types.h"
#include "xe_range_fence.h"
+#include "xe_userptr.h"
struct xe_bo;
struct xe_svm_range;
@@ -46,37 +47,6 @@ struct xe_vm_pgtable_update_op;
#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8)
#define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9)
-/** struct xe_userptr - User pointer */
-struct xe_userptr {
- /** @invalidate_link: Link for the vm::userptr.invalidated list */
- struct list_head invalidate_link;
- /** @userptr: link into VM repin list if userptr. */
- struct list_head repin_link;
- /**
- * @notifier: MMU notifier for user pointer (invalidation call back)
- */
- struct mmu_interval_notifier notifier;
- /** @sgt: storage for a scatter gather table */
- struct sg_table sgt;
- /** @sg: allocated scatter gather table */
- struct sg_table *sg;
- /** @notifier_seq: notifier sequence number */
- unsigned long notifier_seq;
- /** @unmap_mutex: Mutex protecting dma-unmapping */
- struct mutex unmap_mutex;
- /**
- * @initial_bind: user pointer has been bound at least once.
- * write: vm->userptr.notifier_lock in read mode and vm->resv held.
- * read: vm->userptr.notifier_lock in write mode or vm->resv held.
- */
- bool initial_bind;
- /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */
- bool mapped;
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
- u32 divisor;
-#endif
-};
-
/**
* struct xe_vma_mem_attr - memory attributes associated with vma
*/
@@ -140,10 +110,10 @@ struct xe_vma {
/**
* @tile_invalidated: Tile mask of binding are invalidated for this VMA.
- * protected by BO's resv and for userptrs, vm->userptr.notifier_lock in
- * write mode for writing or vm->userptr.notifier_lock in read mode and
+ * protected by BO's resv and for userptrs, vm->svm.gpusvm.notifier_lock in
+ * write mode for writing or vm->svm.gpusvm.notifier_lock in read mode and
* the vm->resv. For stable reading, BO's resv or userptr
- * vm->userptr.notifier_lock in read mode is required. Can be
+ * vm->svm.gpusvm.notifier_lock in read mode is required. Can be
* opportunistically read with READ_ONCE outside of locks.
*/
u8 tile_invalidated;
@@ -154,7 +124,7 @@ struct xe_vma {
/**
* @tile_present: Tile mask of binding are present for this VMA.
* protected by vm->lock, vm->resv and for userptrs,
- * vm->userptr.notifier_lock for writing. Needs either for reading,
+ * vm->svm.gpusvm.notifier_lock for writing. Needs either for reading,
* but if reading is done under the vm->lock only, it needs to be held
* in write mode.
*/
@@ -289,33 +259,7 @@ struct xe_vm {
const struct xe_pt_ops *pt_ops;
/** @userptr: user pointer state */
- struct {
- /**
- * @userptr.repin_list: list of VMAs which are user pointers,
- * and needs repinning. Protected by @lock.
- */
- struct list_head repin_list;
- /**
- * @notifier_lock: protects notifier in write mode and
- * submission in read mode.
- */
- struct rw_semaphore notifier_lock;
- /**
- * @userptr.invalidated_lock: Protects the
- * @userptr.invalidated list.
- */
- spinlock_t invalidated_lock;
- /**
- * @userptr.invalidated: List of invalidated userptrs, not yet
- * picked
- * up for revalidation. Protected from access with the
- * @invalidated_lock. Removing items from the list
- * additionally requires @lock in write mode, and adding
- * items to the list requires either the @userptr.notifier_lock in
- * write mode, OR @lock in write mode.
- */
- struct list_head invalidated;
- } userptr;
+ struct xe_userptr_vm userptr;
/** @preempt: preempt state */
struct {
@@ -363,18 +307,34 @@ struct xe_vm {
} error_capture;
/**
+ * @validation: Validation data only valid with the vm resv held.
+ * Note: This is really task state of the task holding the vm resv,
+ * and moving forward we should
+ * come up with a better way of passing this down the call-
+ * chain.
+ */
+ struct {
+ /**
+ * @validation.validating: The task that is currently making bos resident.
+ * for this vm.
+ * Protected by the VM's resv for writing. Opportunistic reading can be done
+ * using READ_ONCE. Note: This is a workaround for the
+ * TTM eviction_valuable() callback not being passed a struct
+ * ttm_operation_context(). Future work might want to address this.
+ */
+ struct task_struct *validating;
+ /**
+ * @validation.exec The drm_exec context used when locking the vm resv.
+ * Protected by the vm's resv.
+ */
+ struct drm_exec *_exec;
+ } validation;
+
+ /**
* @tlb_flush_seqno: Required TLB flush seqno for the next exec.
* protected by the vm resv.
*/
u64 tlb_flush_seqno;
- /**
- * @validating: The task that is currently making bos resident for this vm.
- * Protected by the VM's resv for writing. Opportunistic reading can be done
- * using READ_ONCE. Note: This is a workaround for the
- * TTM eviction_valuable() callback not being passed a struct
- * ttm_operation_context(). Future work might want to address this.
- */
- struct task_struct *validating;
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 52c7df4c3afd..cd03891654a1 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -39,7 +39,8 @@
* Register Immediate commands) once when initializing the device and saved in
* the default context. That default context is then used on every context
* creation to have a "primed golden context", i.e. a context image that
- * already contains the changes needed to all the registers.
+ * already contains the changes needed to all the registers. See
+ * drivers/gpu/drm/xe/xe_lrc.c for default context handling.
*
* - Engine workarounds: the list of these WAs is applied whenever the specific
* engine is reset. It's also possible that a set of engine classes share a
@@ -48,10 +49,10 @@
* them need to keeep the workaround programming: the approach taken in the
* driver is to tie those workarounds to the first compute/render engine that
* is registered. When executing with GuC submission, engine resets are
- * outside of kernel driver control, hence the list of registers involved in
+ * outside of kernel driver control, hence the list of registers involved is
* written once, on engine initialization, and then passed to GuC, that
* saves/restores their values before/after the reset takes place. See
- * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
+ * drivers/gpu/drm/xe/xe_guc_ads.c for reference.
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -66,21 +67,39 @@
* hardware on every HW context restore. These buffers are created and
* programmed in the default context so the hardware always go through those
* programming sequences when switching contexts. The support for workaround
- * batchbuffers is enabled these hardware mechanisms:
+ * batchbuffers is enabled via these hardware mechanisms:
*
- * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
- * context, pointing the hardware to jump to that location when that offset
- * is reached in the context restore. Workaround batchbuffer in the driver
- * currently uses this mechanism for all platforms.
+ * #. INDIRECT_CTX (also known as **mid context restore bb**): A batchbuffer
+ * and an offset are provided in the default context, pointing the hardware
+ * to jump to that location when that offset is reached in the context
+ * restore. When a context is being restored, this is executed after the
+ * ring context, in the middle (or beginning) of the engine context image.
*
- * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
- * pointing the hardware to a buffer to continue executing after the
- * engine registers are restored in a context restore sequence. This is
- * currently not used in the driver.
+ * #. BB_PER_CTX_PTR (also known as **post context restore bb**): A
+ * batchbuffer is provided in the default context, pointing the hardware to
+ * a buffer to continue executing after the engine registers are restored
+ * in a context restore sequence.
+ *
+ * Below is the timeline for a context restore sequence:
+ *
+ * .. code::
+ *
+ * INDIRECT_CTX_OFFSET
+ * |----------->|
+ * .------------.------------.-------------.------------.--------------.-----------.
+ * |Ring | Engine | Mid-context | Engine | Post-context | Ring |
+ * |Restore | Restore (1)| BB Restore | Restore (2)| BB Restore | Execution |
+ * `------------'------------'-------------'------------'--------------'-----------'
*
* - Other/OOB: There are WAs that, due to their nature, cannot be applied from
* a central place. Those are peppered around the rest of the code, as needed.
- * Workarounds related to the display IP are the main example.
+ * There's a central place to control which workarounds are enabled:
+ * drivers/gpu/drm/xe/xe_wa_oob.rules for GT workarounds and
+ * drivers/gpu/drm/xe/xe_device_wa_oob.rules for device/SoC workarounds.
+ * These files only record which workarounds are enabled: during early device
+ * initialization those rules are evaluated and recorded by the driver. Then
+ * later the driver checks with ``XE_GT_WA()`` and ``XE_DEVICE_WA()`` to
+ * implement them.
*
* .. [1] Technically, some registers are powercontext saved & restored, so they
* survive a suspend/resume. In practice, writing them again is not too
@@ -612,6 +631,13 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
},
+ { XE_RTP_NAME("18041344222"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute),
+ FUNC(xe_rtp_match_not_sriov_vf),
+ FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)),
+ XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE))
+ },
/* Xe2_LPM */
@@ -672,6 +698,13 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
+ { XE_RTP_NAME("18041344222"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+ FUNC(xe_rtp_match_first_render_or_compute),
+ FUNC(xe_rtp_match_not_sriov_vf),
+ FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)),
+ XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE))
+ },
};
static const struct xe_rtp_entry_sr lrc_was[] = {
@@ -879,6 +912,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
DIS_PARTIAL_AUTOSTRIP |
DIS_AUTOSTRIP))
},
+ { XE_RTP_NAME("22021007897"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3003), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
+ },
};
static __maybe_unused const struct xe_rtp_entry oob_was[] = {
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 338c344dcd7d..f3a6d5d239ce 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -49,7 +49,6 @@
16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
-no_media_l3 MEDIA_VERSION_RANGE(3000, 3002)
14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
16021333562 GRAPHICS_VERSION_RANGE(1200, 1274)
diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig
index 7575fee96cc6..f8b04e49e4ba 100644
--- a/drivers/misc/mei/Kconfig
+++ b/drivers/misc/mei/Kconfig
@@ -81,6 +81,19 @@ config INTEL_MEI_VSC
This driver can also be built as a module. If so, the module
will be called mei-vsc.
+config INTEL_MEI_LB
+ tristate "Intel Late Binding (LB) support on ME Interface"
+ depends on INTEL_MEI_ME
+ depends on DRM_XE
+ help
+ Enable support for Intel Late Binding (LB) via the MEI interface.
+
+ Late Binding is a method for applying firmware updates at runtime,
+ allowing the Intel Xe driver to load firmware payloads such as
+ fan controller or voltage regulator. These firmware updates are
+ authenticated and versioned, and do not require firmware flashing
+ or system reboot.
+
source "drivers/misc/mei/hdcp/Kconfig"
source "drivers/misc/mei/pxp/Kconfig"
source "drivers/misc/mei/gsc_proxy/Kconfig"
diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile
index 6f9fdbf1a495..a203ed766b33 100644
--- a/drivers/misc/mei/Makefile
+++ b/drivers/misc/mei/Makefile
@@ -31,6 +31,7 @@ CFLAGS_mei-trace.o = -I$(src)
obj-$(CONFIG_INTEL_MEI_HDCP) += hdcp/
obj-$(CONFIG_INTEL_MEI_PXP) += pxp/
obj-$(CONFIG_INTEL_MEI_GSC_PROXY) += gsc_proxy/
+obj-$(CONFIG_INTEL_MEI_LB) += mei_lb.o
obj-$(CONFIG_INTEL_MEI_VSC_HW) += mei-vsc-hw.o
mei-vsc-hw-y := vsc-tp.o
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 5cc3ad07d5be..09aae8f9d225 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -615,6 +615,19 @@ u8 mei_cldev_ver(const struct mei_cl_device *cldev)
EXPORT_SYMBOL_GPL(mei_cldev_ver);
/**
+ * mei_cldev_mtu - max message that client can send and receive
+ *
+ * @cldev: mei client device
+ *
+ * Return: mtu or 0 if client is not connected
+ */
+size_t mei_cldev_mtu(const struct mei_cl_device *cldev)
+{
+ return mei_cl_mtu(cldev->cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_mtu);
+
+/**
* mei_cldev_enabled - check whether the device is enabled
*
* @cldev: mei client device
diff --git a/drivers/misc/mei/mei_lb.c b/drivers/misc/mei/mei_lb.c
new file mode 100644
index 000000000000..77686b108d3c
--- /dev/null
+++ b/drivers/misc/mei/mei_lb.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Intel Corporation
+ */
+
+#include <linux/component.h>
+#include <linux/mei_cl_bus.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+
+#include <drm/intel/i915_component.h>
+#include <drm/intel/intel_lb_mei_interface.h>
+
+#include "mkhi.h"
+
+/**
+ * DOC: Late Binding Firmware Update/Upload
+ *
+ * Late Binding is a firmware update/upload mechanism that allows configuration
+ * payloads to be securely delivered and applied at runtime, rather than
+ * being embedded in the system firmware image (e.g., IFWI or SPI flash).
+ *
+ * This mechanism is used to update device-level configuration such as:
+ * - Fan controller
+ * - Voltage regulator (VR)
+ *
+ * Key Characteristics:
+ * ---------------------
+ * - Runtime Delivery:
+ * Firmware blobs are loaded by the host driver (e.g., Xe KMD)
+ * after the GPU or SoC has booted.
+ *
+ * - Secure and Authenticated:
+ * All payloads are signed and verified by the authentication firmware.
+ *
+ * - No Firmware Flashing Required:
+ * Updates are applied in volatile memory and do not require SPI flash
+ * modification or system reboot.
+ *
+ * - Re-entrant:
+ * Multiple updates of the same or different types can be applied
+ * sequentially within a single boot session.
+ *
+ * - Version Controlled:
+ * Each payload includes version and security version number (SVN)
+ * metadata to support anti-rollback enforcement.
+ *
+ * Upload Flow:
+ * ------------
+ * 1. Host driver (KMD or user-space tool) loads the late binding firmware.
+ * 2. Firmware is passed to the MEI interface and forwarded to
+ * authentication firmware.
+ * 3. Authentication firmware authenticates the payload and extracts
+ * command and data arrays.
+ * 4. Authentication firmware delivers the configuration to PUnit/PCODE.
+ * 5. Status is returned back to the host via MEI.
+ */
+
+#define INTEL_LB_CMD 0x12
+#define INTEL_LB_RSP (INTEL_LB_CMD | 0x80)
+
+#define INTEL_LB_SEND_TIMEOUT_MSEC 3000
+#define INTEL_LB_RECV_TIMEOUT_MSEC 3000
+
+/**
+ * struct mei_lb_req - Late Binding request structure
+ * @header: MKHI message header (see struct mkhi_msg_hdr)
+ * @type: Type of the Late Binding payload
+ * @flags: Flags to be passed to the authentication firmware (e.g. %INTEL_LB_FLAGS_IS_PERSISTENT)
+ * @reserved: Reserved for future use by authentication firmware, must be set to 0
+ * @payload_size: Size of the payload data in bytes
+ * @payload: Payload data to be sent to the authentication firmware
+ */
+struct mei_lb_req {
+ struct mkhi_msg_hdr header;
+ __le32 type;
+ __le32 flags;
+ __le32 reserved[2];
+ __le32 payload_size;
+ u8 payload[] __counted_by(payload_size);
+} __packed;
+
+/**
+ * struct mei_lb_rsp - Late Binding response structure
+ * @header: MKHI message header (see struct mkhi_msg_hdr)
+ * @type: Type of the Late Binding payload
+ * @reserved: Reserved for future use by authentication firmware, must be set to 0
+ * @status: Status returned by authentication firmware (see &enum intel_lb_status)
+ */
+struct mei_lb_rsp {
+ struct mkhi_msg_hdr header;
+ __le32 type;
+ __le32 reserved[2];
+ __le32 status;
+} __packed;
+
+static bool mei_lb_check_response(const struct device *dev, ssize_t bytes,
+ struct mei_lb_rsp *rsp)
+{
+ /*
+ * Received message size may be smaller than the full message size when
+ * reply contains only MKHI header with result field set to the error code.
+ * Check the header size and content first to output exact error, if needed,
+ * and then process to the whole message.
+ */
+ if (bytes < sizeof(rsp->header)) {
+ dev_err(dev, "Received less than header size from the firmware: %zd < %zu\n",
+ bytes, sizeof(rsp->header));
+ return false;
+ }
+ if (rsp->header.group_id != MKHI_GROUP_ID_GFX) {
+ dev_err(dev, "Mismatch group id: 0x%x instead of 0x%x\n",
+ rsp->header.group_id, MKHI_GROUP_ID_GFX);
+ return false;
+ }
+ if (rsp->header.command != INTEL_LB_RSP) {
+ dev_err(dev, "Mismatch command: 0x%x instead of 0x%x\n",
+ rsp->header.command, INTEL_LB_RSP);
+ return false;
+ }
+ if (rsp->header.result) {
+ dev_err(dev, "Error in result: 0x%x\n", rsp->header.result);
+ return false;
+ }
+ if (bytes < sizeof(*rsp)) {
+ dev_err(dev, "Received less than message size from the firmware: %zd < %zu\n",
+ bytes, sizeof(*rsp));
+ return false;
+ }
+
+ return true;
+}
+
+static int mei_lb_push_payload(struct device *dev,
+ enum intel_lb_type type, u32 flags,
+ const void *payload, size_t payload_size)
+{
+ struct mei_cl_device *cldev;
+ struct mei_lb_req *req = NULL;
+ struct mei_lb_rsp rsp;
+ size_t req_size;
+ ssize_t bytes;
+ int ret;
+
+ cldev = to_mei_cl_device(dev);
+
+ ret = mei_cldev_enable(cldev);
+ if (ret) {
+ dev_dbg(dev, "Failed to enable firmware client. %d\n", ret);
+ return ret;
+ }
+
+ req_size = struct_size(req, payload, payload_size);
+ if (req_size > mei_cldev_mtu(cldev)) {
+ dev_err(dev, "Payload is too big: %zu\n", payload_size);
+ ret = -EMSGSIZE;
+ goto end;
+ }
+
+ req = kmalloc(req_size, GFP_KERNEL);
+ if (!req) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ req->header.group_id = MKHI_GROUP_ID_GFX;
+ req->header.command = INTEL_LB_CMD;
+ req->type = cpu_to_le32(type);
+ req->flags = cpu_to_le32(flags);
+ req->reserved[0] = 0;
+ req->reserved[1] = 0;
+ req->payload_size = cpu_to_le32(payload_size);
+ memcpy(req->payload, payload, payload_size);
+
+ bytes = mei_cldev_send_timeout(cldev, (u8 *)req, req_size,
+ INTEL_LB_SEND_TIMEOUT_MSEC);
+ if (bytes < 0) {
+ dev_err(dev, "Failed to send late binding request to firmware. %zd\n", bytes);
+ ret = bytes;
+ goto end;
+ }
+
+ bytes = mei_cldev_recv_timeout(cldev, (u8 *)&rsp, sizeof(rsp),
+ INTEL_LB_RECV_TIMEOUT_MSEC);
+ if (bytes < 0) {
+ dev_err(dev, "Failed to receive late binding reply from MEI firmware. %zd\n",
+ bytes);
+ ret = bytes;
+ goto end;
+ }
+ if (!mei_lb_check_response(dev, bytes, &rsp)) {
+ dev_err(dev, "Bad response from the firmware. header: %02x %02x %02x %02x\n",
+ rsp.header.group_id, rsp.header.command,
+ rsp.header.reserved, rsp.header.result);
+ ret = -EPROTO;
+ goto end;
+ }
+
+ dev_dbg(dev, "status = %u\n", le32_to_cpu(rsp.status));
+ ret = (int)le32_to_cpu(rsp.status);
+end:
+ mei_cldev_disable(cldev);
+ kfree(req);
+ return ret;
+}
+
+static const struct intel_lb_component_ops mei_lb_ops = {
+ .push_payload = mei_lb_push_payload,
+};
+
+static int mei_lb_component_master_bind(struct device *dev)
+{
+ return component_bind_all(dev, (void *)&mei_lb_ops);
+}
+
+static void mei_lb_component_master_unbind(struct device *dev)
+{
+ component_unbind_all(dev, (void *)&mei_lb_ops);
+}
+
+static const struct component_master_ops mei_lb_component_master_ops = {
+ .bind = mei_lb_component_master_bind,
+ .unbind = mei_lb_component_master_unbind,
+};
+
+static int mei_lb_component_match(struct device *dev, int subcomponent,
+ void *data)
+{
+ /*
+ * This function checks if requester is Intel %PCI_CLASS_DISPLAY_VGA or
+ * %PCI_CLASS_DISPLAY_OTHER device, and checks if the requester is the
+ * grand parent of mei_if i.e. late bind MEI device
+ */
+ struct device *base = data;
+ struct pci_dev *pdev;
+
+ if (!dev)
+ return 0;
+
+ if (!dev_is_pci(dev))
+ return 0;
+
+ pdev = to_pci_dev(dev);
+
+ if (pdev->vendor != PCI_VENDOR_ID_INTEL)
+ return 0;
+
+ if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8) &&
+ pdev->class != (PCI_CLASS_DISPLAY_OTHER << 8))
+ return 0;
+
+ if (subcomponent != INTEL_COMPONENT_LB)
+ return 0;
+
+ base = base->parent;
+ if (!base) /* mei device */
+ return 0;
+
+ base = base->parent; /* pci device */
+
+ return !!base && dev == base;
+}
+
+static int mei_lb_probe(struct mei_cl_device *cldev,
+ const struct mei_cl_device_id *id)
+{
+ struct component_match *master_match = NULL;
+ int ret;
+
+ component_match_add_typed(&cldev->dev, &master_match,
+ mei_lb_component_match, &cldev->dev);
+ if (IS_ERR_OR_NULL(master_match))
+ return -ENOMEM;
+
+ ret = component_master_add_with_match(&cldev->dev,
+ &mei_lb_component_master_ops,
+ master_match);
+ if (ret < 0)
+ dev_err(&cldev->dev, "Failed to add late binding master component. %d\n", ret);
+
+ return ret;
+}
+
+static void mei_lb_remove(struct mei_cl_device *cldev)
+{
+ component_master_del(&cldev->dev, &mei_lb_component_master_ops);
+}
+
+#define MEI_GUID_MKHI UUID_LE(0xe2c2afa2, 0x3817, 0x4d19, \
+ 0x9d, 0x95, 0x6, 0xb1, 0x6b, 0x58, 0x8a, 0x5d)
+
+static const struct mei_cl_device_id mei_lb_tbl[] = {
+ { .uuid = MEI_GUID_MKHI, .version = MEI_CL_VERSION_ANY },
+ { }
+};
+MODULE_DEVICE_TABLE(mei, mei_lb_tbl);
+
+static struct mei_cl_driver mei_lb_driver = {
+ .id_table = mei_lb_tbl,
+ .name = "mei_lb",
+ .probe = mei_lb_probe,
+ .remove = mei_lb_remove,
+};
+
+module_mei_cl_driver(mei_lb_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MEI Late Binding Firmware Update/Upload");
diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h
index 0e336148309d..5434048a2ca4 100644
--- a/include/drm/drm_gpusvm.h
+++ b/include/drm/drm_gpusvm.h
@@ -106,16 +106,16 @@ struct drm_gpusvm_notifier {
};
/**
- * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags
+ * struct drm_gpusvm_pages_flags - Structure representing a GPU SVM pages flags
*
- * @migrate_devmem: Flag indicating whether the range can be migrated to device memory
- * @unmapped: Flag indicating if the range has been unmapped
- * @partial_unmap: Flag indicating if the range has been partially unmapped
- * @has_devmem_pages: Flag indicating if the range has devmem pages
- * @has_dma_mapping: Flag indicating if the range has a DMA mapping
- * @__flags: Flags for range in u16 form (used for READ_ONCE)
+ * @migrate_devmem: Flag indicating whether the pages can be migrated to device memory
+ * @unmapped: Flag indicating if the pages has been unmapped
+ * @partial_unmap: Flag indicating if the pages has been partially unmapped
+ * @has_devmem_pages: Flag indicating if the pages has devmem pages
+ * @has_dma_mapping: Flag indicating if the pages has a DMA mapping
+ * @__flags: Flags for pages in u16 form (used for READ_ONCE)
*/
-struct drm_gpusvm_range_flags {
+struct drm_gpusvm_pages_flags {
union {
struct {
/* All flags below must be set upon creation */
@@ -131,6 +131,27 @@ struct drm_gpusvm_range_flags {
};
/**
+ * struct drm_gpusvm_pages - Structure representing a GPU SVM mapped pages
+ *
+ * @dma_addr: Device address array
+ * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping.
+ * Note this is assuming only one drm_pagemap per range is allowed.
+ * @notifier_seq: Notifier sequence number of the range's pages
+ * @flags: Flags for range
+ * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory
+ * @flags.unmapped: Flag indicating if the range has been unmapped
+ * @flags.partial_unmap: Flag indicating if the range has been partially unmapped
+ * @flags.has_devmem_pages: Flag indicating if the range has devmem pages
+ * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping
+ */
+struct drm_gpusvm_pages {
+ struct drm_pagemap_addr *dma_addr;
+ struct drm_pagemap *dpagemap;
+ unsigned long notifier_seq;
+ struct drm_gpusvm_pages_flags flags;
+};
+
+/**
* struct drm_gpusvm_range - Structure representing a GPU SVM range
*
* @gpusvm: Pointer to the GPU SVM structure
@@ -138,11 +159,7 @@ struct drm_gpusvm_range_flags {
* @refcount: Reference count for the range
* @itree: Interval tree node for the range (inserted in GPU SVM notifier)
* @entry: List entry to fast interval tree traversal
- * @notifier_seq: Notifier sequence number of the range's pages
- * @dma_addr: Device address array
- * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping.
- * Note this is assuming only one drm_pagemap per range is allowed.
- * @flags: Flags for range
+ * @pages: The pages for this range.
*
* This structure represents a GPU SVM range used for tracking memory ranges
* mapped in a DRM device.
@@ -153,10 +170,7 @@ struct drm_gpusvm_range {
struct kref refcount;
struct interval_tree_node itree;
struct list_head entry;
- unsigned long notifier_seq;
- struct drm_pagemap_addr *dma_addr;
- struct drm_pagemap *dpagemap;
- struct drm_gpusvm_range_flags flags;
+ struct drm_gpusvm_pages pages;
};
/**
@@ -293,6 +307,22 @@ drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range,
const struct mmu_notifier_range *mmu_range);
+int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ struct mm_struct *mm,
+ struct mmu_interval_notifier *notifier,
+ unsigned long pages_start, unsigned long pages_end,
+ const struct drm_gpusvm_ctx *ctx);
+
+void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ unsigned long npages,
+ const struct drm_gpusvm_ctx *ctx);
+
+void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ unsigned long npages);
+
#ifdef CONFIG_LOCKDEP
/**
* drm_gpusvm_driver_set_lock() - Set the lock protecting accesses to GPU SVM
diff --git a/include/drm/intel/i915_component.h b/include/drm/intel/i915_component.h
index 4ea3b17aa143..8082db222e00 100644
--- a/include/drm/intel/i915_component.h
+++ b/include/drm/intel/i915_component.h
@@ -31,6 +31,7 @@ enum i915_component_type {
I915_COMPONENT_HDCP,
I915_COMPONENT_PXP,
I915_COMPONENT_GSC_PROXY,
+ INTEL_COMPONENT_LB,
};
/* MAX_PORT is the number of port
diff --git a/include/drm/intel/intel_lb_mei_interface.h b/include/drm/intel/intel_lb_mei_interface.h
new file mode 100644
index 000000000000..d65be2cba2ab
--- /dev/null
+++ b/include/drm/intel/intel_lb_mei_interface.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (c) 2025 Intel Corporation
+ */
+
+#ifndef _INTEL_LB_MEI_INTERFACE_H_
+#define _INTEL_LB_MEI_INTERFACE_H_
+
+#include <linux/types.h>
+
+struct device;
+
+/**
+ * define INTEL_LB_FLAG_IS_PERSISTENT - Mark the payload as persistent
+ *
+ * This flag indicates that the late binding payload should be stored
+ * persistently in flash across warm resets.
+ */
+#define INTEL_LB_FLAG_IS_PERSISTENT BIT(0)
+
+/**
+ * enum intel_lb_type - enum to determine late binding payload type
+ * @INTEL_LB_TYPE_FAN_CONTROL: Fan controller configuration
+ */
+enum intel_lb_type {
+ INTEL_LB_TYPE_FAN_CONTROL = 1,
+};
+
+/**
+ * enum intel_lb_status - Status codes returned on late binding transmissions
+ * @INTEL_LB_STATUS_SUCCESS: Operation completed successfully
+ * @INTEL_LB_STATUS_4ID_MISMATCH: Mismatch in the expected 4ID (firmware identity/token)
+ * @INTEL_LB_STATUS_ARB_FAILURE: Arbitration failure (e.g. conflicting access or state)
+ * @INTEL_LB_STATUS_GENERAL_ERROR: General firmware error not covered by other codes
+ * @INTEL_LB_STATUS_INVALID_PARAMS: One or more input parameters are invalid
+ * @INTEL_LB_STATUS_INVALID_SIGNATURE: Payload has an invalid or untrusted signature
+ * @INTEL_LB_STATUS_INVALID_PAYLOAD: Payload contents are not accepted by firmware
+ * @INTEL_LB_STATUS_TIMEOUT: Operation timed out before completion
+ */
+enum intel_lb_status {
+ INTEL_LB_STATUS_SUCCESS = 0,
+ INTEL_LB_STATUS_4ID_MISMATCH = 1,
+ INTEL_LB_STATUS_ARB_FAILURE = 2,
+ INTEL_LB_STATUS_GENERAL_ERROR = 3,
+ INTEL_LB_STATUS_INVALID_PARAMS = 4,
+ INTEL_LB_STATUS_INVALID_SIGNATURE = 5,
+ INTEL_LB_STATUS_INVALID_PAYLOAD = 6,
+ INTEL_LB_STATUS_TIMEOUT = 7,
+};
+
+/**
+ * struct intel_lb_component_ops - Ops for late binding services
+ */
+struct intel_lb_component_ops {
+ /**
+ * push_payload - Sends a payload to the authentication firmware
+ * @dev: Device struct corresponding to the mei device
+ * @type: Payload type (see &enum intel_lb_type)
+ * @flags: Payload flags bitmap (e.g. %INTEL_LB_FLAGS_IS_PERSISTENT)
+ * @payload: Pointer to payload buffer
+ * @payload_size: Payload buffer size in bytes
+ *
+ * Return: 0 success, negative errno value on transport failure,
+ * positive status returned by firmware
+ */
+ int (*push_payload)(struct device *dev, u32 type, u32 flags,
+ const void *payload, size_t payload_size);
+};
+
+#endif /* _INTEL_LB_MEI_INTERFACE_H_ */
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 725fd7727422..a82755e1fc40 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -113,6 +113,7 @@ int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
mei_cldev_cb_t notif_cb);
u8 mei_cldev_ver(const struct mei_cl_device *cldev);
+size_t mei_cldev_mtu(const struct mei_cl_device *cldev);
void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev);
void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data);