summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnkit Nautiyal <ankit.k.nautiyal@intel.com>2025-09-25 18:48:45 +0530
committerAnkit Nautiyal <ankit.k.nautiyal@intel.com>2025-09-25 18:48:45 +0530
commitfe1a9c56ed67912cb5cf3dbdb667a4e6e16b0ba4 (patch)
treeb11fffcda8f20602c9e923cebbb71fc82c4aa1a2
parent554a2000d0691b337c8fce3d25fc4066f0a2488e (diff)
parenta4916b4da44812384388ac09a2baf9da461dbc30 (diff)
Merge remote-tracking branch 'drm-xe/drm-xe-next' into drm-tip
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c17
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c12
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c9
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.c23
-rw-r--r--drivers/gpu/drm/xe/xe_device.c65
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c41
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c129
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c267
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c44
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c66
-rw-r--r--drivers/gpu/drm/xe/xe_pm.h2
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.c4
-rw-r--r--drivers/gpu/drm/xe/xe_query.c15
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.c7
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c2
17 files changed, 371 insertions, 336 deletions
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index a7e548a2bdfb..5df98de5ba3c 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -31,6 +31,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
struct drm_exec *exec)
{
struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+ struct dma_buf_attachment *attach;
u32 mem_type;
int ret;
@@ -46,7 +47,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
mem_type = XE_PL_TT;
else if (params->force_different_devices && !is_dynamic(params) &&
(params->mem_mask & XE_BO_FLAG_SYSTEM))
- /* Pin migrated to TT */
+ /* Pin migrated to TT on non-dynamic attachments. */
mem_type = XE_PL_TT;
if (!xe_bo_is_mem_type(exported, mem_type)) {
@@ -88,6 +89,18 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+ /* Check that we can pin without migrating. */
+ attach = list_first_entry_or_null(&dmabuf->attachments, typeof(*attach), node);
+ if (attach) {
+ int err = dma_buf_pin(attach);
+
+ if (!err) {
+ KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+ dma_buf_unpin(attach);
+ }
+ KUNIT_EXPECT_EQ(test, err, 0);
+ }
+
if (params->force_different_devices)
KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
else
@@ -150,7 +163,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
xe_bo_lock(import_bo, false);
err = xe_bo_validate(import_bo, NULL, false, exec);
- /* Pinning in VRAM is not allowed. */
+ /* Pinning in VRAM is not allowed for non-dynamic attachments */
if (!is_dynamic(params) &&
params->force_different_devices &&
!(params->mem_mask & XE_BO_FLAG_SYSTEM))
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index aa29ac759d5d..0f136bc85b76 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -211,15 +211,15 @@ static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc)
* param generator can be used for both
*/
static const struct xe_ip pre_gmdid_graphics_ips[] = {
- graphics_ip_xelp,
- graphics_ip_xelpp,
- graphics_ip_xehpg,
- graphics_ip_xehpc,
+ { 1200, "Xe_LP", &graphics_xelp },
+ { 1210, "Xe_LP+", &graphics_xelp },
+ { 1255, "Xe_HPG", &graphics_xehpg },
+ { 1260, "Xe_HPC", &graphics_xehpc },
};
static const struct xe_ip pre_gmdid_media_ips[] = {
- media_ip_xem,
- media_ip_xehpm,
+ { 1200, "Xe_M", &media_xem },
+ { 1255, "Xe_HPM", &media_xem },
};
KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc);
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index d5dbc51e8612..1a12675b2ea9 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -73,6 +73,11 @@ int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe)
&xe->pinned.late.kernel_bo_present,
xe_bo_notifier_prepare_pinned);
+ if (!ret)
+ ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external,
+ xe_bo_notifier_prepare_pinned);
+
return ret;
}
@@ -93,6 +98,10 @@ void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe)
(void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
&xe->pinned.late.kernel_bo_present,
xe_bo_notifier_unprepare_pinned);
+
+ (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external,
+ xe_bo_notifier_unprepare_pinned);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index 8a9b950e7a6d..139663423185 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -126,8 +126,20 @@
* not intended for normal execution and will taint the kernel with TAINT_TEST
* when used.
*
- * Currently this is implemented only for post and mid context restore.
- * Examples:
+ * The syntax allows to pass straight instructions to be executed by the engine
+ * in a batch buffer or set specific registers.
+ *
+ * #. Generic instruction::
+ *
+ * <engine-class> cmd <instr> [[dword0] [dword1] [...]]
+ *
+ * #. Simple register setting::
+ *
+ * <engine-class> reg <address> <value>
+ *
+ * Commands are saved per engine class: all instances of that class will execute
+ * those commands during context switch. The instruction, dword arguments,
+ * addresses and values are in hex format like in the examples below.
*
* #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the
* normal context restore::
@@ -154,7 +166,8 @@
* When using multiple lines, make sure to use a command that is
* implemented with a single write syscall, like HEREDOC.
*
- * These attributes can only be set before binding to the device.
+ * Currently this is implemented only for post and mid context restore and
+ * these attributes can only be set before binding to the device.
*
* Remove devices
* ==============
@@ -324,8 +337,8 @@ static const struct engine_info *lookup_engine_info(const char *pattern, u64 *ma
continue;
pattern += strlen(engine_info[i].cls);
- if (!mask && !*pattern)
- return &engine_info[i];
+ if (!mask)
+ return *pattern ? NULL : &engine_info[i];
if (!strcmp(pattern, "*")) {
*mask = engine_info[i].mask;
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index fdb7b7498920..09f8a66c9728 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -8,6 +8,7 @@
#include <linux/aperture.h>
#include <linux/delay.h>
#include <linux/fault-inject.h>
+#include <linux/iopoll.h>
#include <linux/units.h>
#include <drm/drm_atomic_helper.h>
@@ -630,16 +631,22 @@ mask_err:
return err;
}
-static bool verify_lmem_ready(struct xe_device *xe)
+static int lmem_initializing(struct xe_device *xe)
{
- u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
+ if (xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT)
+ return 0;
+
+ if (signal_pending(current))
+ return -EINTR;
- return !!val;
+ return 1;
}
static int wait_for_lmem_ready(struct xe_device *xe)
{
- unsigned long timeout, start;
+ const unsigned long TIMEOUT_SEC = 60;
+ unsigned long prev_jiffies;
+ int initializing;
if (!IS_DGFX(xe))
return 0;
@@ -647,39 +654,35 @@ static int wait_for_lmem_ready(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return 0;
- if (verify_lmem_ready(xe))
+ if (!lmem_initializing(xe))
return 0;
drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+ prev_jiffies = jiffies;
- start = jiffies;
- timeout = start + secs_to_jiffies(60); /* 60 sec! */
-
- do {
- if (signal_pending(current))
- return -EINTR;
-
- /*
- * The boot firmware initializes local memory and
- * assesses its health. If memory training fails,
- * the punit will have been instructed to keep the GT powered
- * down.we won't be able to communicate with it
- *
- * If the status check is done before punit updates the register,
- * it can lead to the system being unusable.
- * use a timeout and defer the probe to prevent this.
- */
- if (time_after(jiffies, timeout)) {
- drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
- return -EPROBE_DEFER;
- }
-
- msleep(20);
-
- } while (!verify_lmem_ready(xe));
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ poll_timeout_us(initializing = lmem_initializing(xe),
+ initializing <= 0,
+ 20 * USEC_PER_MSEC, TIMEOUT_SEC * USEC_PER_SEC, true);
+ if (initializing < 0)
+ return initializing;
+
+ if (initializing) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
drm_dbg(&xe->drm, "lmem ready after %ums",
- jiffies_to_msecs(jiffies - start));
+ jiffies_to_msecs(jiffies - prev_jiffies));
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index a7d67725c3ee..54e42960daad 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -48,32 +48,43 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf,
static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct dma_buf *dmabuf = attach->dmabuf;
+ struct drm_gem_object *obj = dmabuf->priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = xe_bo_device(bo);
struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
+ bool allow_vram = true;
int ret;
- /*
- * For now only support pinning in TT memory, for two reasons:
- * 1) Avoid pinning in a placement not accessible to some importers.
- * 2) Pinning in VRAM requires PIN accounting which is a to-do.
- */
- if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT)) {
+ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
+ allow_vram = false;
+ } else {
+ list_for_each_entry(attach, &dmabuf->attachments, node) {
+ if (!attach->peer2peer) {
+ allow_vram = false;
+ break;
+ }
+ }
+ }
+
+ if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT) &&
+ !(xe_bo_is_vram(bo) && allow_vram)) {
drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n");
return -EINVAL;
}
- ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec);
- if (ret) {
- if (ret != -EINTR && ret != -ERESTARTSYS)
- drm_dbg(&xe->drm,
- "Failed migrating dma-buf to TT memory: %pe\n",
- ERR_PTR(ret));
- return ret;
+ if (!allow_vram) {
+ ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec);
+ if (ret) {
+ if (ret != -EINTR && ret != -ERESTARTSYS)
+ drm_dbg(&xe->drm,
+ "Failed migrating dma-buf to TT memory: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
}
- ret = xe_bo_pin_external(bo, true, exec);
+ ret = xe_bo_pin_external(bo, !allow_vram, exec);
xe_assert(xe, !ret);
return 0;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 7715e74bb945..83897950f0da 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -16,6 +16,7 @@
#include "xe_exec_queue.h"
#include "xe_hw_engine_group.h"
#include "xe_macros.h"
+#include "xe_pm.h"
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#include "xe_sync.h"
@@ -247,7 +248,7 @@ retry:
* on task freezing during suspend / hibernate, the call will
* return -ERESTARTSYS and the IOCTL will be rerun.
*/
- err = wait_for_completion_interruptible(&xe->pm_block);
+ err = xe_pm_block_on_suspend(xe);
if (err)
goto err_unlock_list;
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index f253e2df4907..b9176d4398e1 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -12,7 +12,6 @@
#include "xe_device.h"
#include "xe_force_wake.h"
-#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
#include "xe_gt_idle.h"
@@ -36,6 +35,11 @@
#include "xe_uc_debugfs.h"
#include "xe_wa.h"
+static struct xe_gt *node_to_gt(struct drm_info_node *node)
+{
+ return node->dent->d_parent->d_inode->i_private;
+}
+
/**
* xe_gt_debugfs_simple_show - A show callback for struct drm_info_list
* @m: the &seq_file
@@ -78,8 +82,7 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data)
{
struct drm_printer p = drm_seq_file_printer(m);
struct drm_info_node *node = m->private;
- struct dentry *parent = node->dent->d_parent;
- struct xe_gt *gt = parent->d_inode->i_private;
+ struct xe_gt *gt = node_to_gt(node);
int (*print)(struct xe_gt *, struct drm_printer *) = node->info_ent->data;
if (WARN_ON(!print))
@@ -88,15 +91,36 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data)
return print(gt, &p);
}
-static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
+/**
+ * xe_gt_debugfs_show_with_rpm - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * Similar to xe_gt_debugfs_simple_show() but implicitly takes a RPM ref.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data)
{
+ struct drm_info_node *node = m->private;
+ struct xe_gt *gt = node_to_gt(node);
struct xe_device *xe = gt_to_xe(gt);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = xe_gt_debugfs_simple_show(m, data);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
+{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
unsigned int fw_ref;
int ret = 0;
- xe_pm_runtime_get(xe);
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
ret = -ETIMEDOUT;
@@ -108,58 +132,27 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
fw_put:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
- xe_pm_runtime_put(xe);
-
- return ret;
-}
-
-static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
-{
- int ret;
-
- xe_pm_runtime_get(gt_to_xe(gt));
- ret = xe_gt_idle_pg_print(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
return ret;
}
static int topology(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_gt_topology_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int steering(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_gt_mcr_steering_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
-static int ggtt(struct xe_gt *gt, struct drm_printer *p)
-{
- int ret;
-
- xe_pm_runtime_get(gt_to_xe(gt));
- ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return ret;
-}
-
static int register_save_restore(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- xe_pm_runtime_get(gt_to_xe(gt));
-
xe_reg_sr_dump(&gt->reg_sr, p);
drm_printf(p, "\n");
@@ -177,98 +170,66 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p)
for_each_hw_engine(hwe, gt, id)
xe_reg_whitelist_dump(&hwe->reg_whitelist, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int workarounds(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_wa_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int tunings(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_tuning_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int pat(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_pat_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int mocs(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_mocs_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int ccs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COMPUTE);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int bcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COPY);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int vcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_DECODE);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_guc_hwconfig_dump(&gt->uc.guc, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
@@ -278,26 +239,26 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
* - without access to the PF specific data
*/
static const struct drm_info_list vf_safe_debugfs_list[] = {
- {"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
- {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
- {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
- {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
- {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings},
- {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc},
- {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc},
- {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
- {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc},
- {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc},
- {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig},
+ { "topology", .show = xe_gt_debugfs_show_with_rpm, .data = topology },
+ { "register-save-restore",
+ .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore },
+ { "workarounds", .show = xe_gt_debugfs_show_with_rpm, .data = workarounds },
+ { "tunings", .show = xe_gt_debugfs_show_with_rpm, .data = tunings },
+ { "default_lrc_rcs", .show = xe_gt_debugfs_show_with_rpm, .data = rcs_default_lrc },
+ { "default_lrc_ccs", .show = xe_gt_debugfs_show_with_rpm, .data = ccs_default_lrc },
+ { "default_lrc_bcs", .show = xe_gt_debugfs_show_with_rpm, .data = bcs_default_lrc },
+ { "default_lrc_vcs", .show = xe_gt_debugfs_show_with_rpm, .data = vcs_default_lrc },
+ { "default_lrc_vecs", .show = xe_gt_debugfs_show_with_rpm, .data = vecs_default_lrc },
+ { "hwconfig", .show = xe_gt_debugfs_show_with_rpm, .data = hwconfig },
};
/* everything else should be added here */
static const struct drm_info_list pf_only_debugfs_list[] = {
- {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
- {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs},
- {"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
- {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
- {"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
+ { "hw_engines", .show = xe_gt_debugfs_show_with_rpm, .data = hw_engines },
+ { "mocs", .show = xe_gt_debugfs_show_with_rpm, .data = mocs },
+ { "pat", .show = xe_gt_debugfs_show_with_rpm, .data = pat },
+ { "powergate_info", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_idle_pg_print },
+ { "steering", .show = xe_gt_debugfs_show_with_rpm, .data = steering },
};
static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t *ppos,
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
index 05a6cc93c78c..32ee3264051b 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -11,5 +11,6 @@ struct xe_gt;
void xe_gt_debugfs_register(struct xe_gt *gt);
int xe_gt_debugfs_simple_show(struct seq_file *m, void *data);
+int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 00789844ea4d..d5adbbb013ec 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -5,6 +5,7 @@
#include "xe_guc.h"
+#include <linux/iopoll.h>
#include <drm/drm_managed.h>
#include <generated/xe_wa_oob.h>
@@ -971,20 +972,93 @@ static int guc_xfer_rsa(struct xe_guc *guc)
}
/*
- * Check a previously read GuC status register (GUC_STATUS) looking for
- * known terminal states (either completion or failure) of either the
- * microkernel status field or the boot ROM status field. Returns +1 for
- * successful completion, -1 for failure and 0 for any intermediate state.
+ * Wait for the GuC to start up.
+ *
+ * Measurements indicate this should take no more than 20ms (assuming the GT
+ * clock is at maximum frequency). However, thermal throttling and other issues
+ * can prevent the clock hitting max and thus making the load take significantly
+ * longer. Allow up to 3s as a safety margin in normal builds. For
+ * CONFIG_DRM_XE_DEBUG allow up to 10s to account for slower execution, issues
+ * in PCODE, driver, fan, etc.
+ *
+ * Keep checking the GUC_STATUS every 10ms with a debug message every 100
+ * attempts as a "I'm slow, but alive" message. Regardless, if it takes more
+ * than 200ms, emit a warning.
*/
-static int guc_load_done(u32 status)
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define GUC_LOAD_TIMEOUT_SEC 20
+#else
+#define GUC_LOAD_TIMEOUT_SEC 3
+#endif
+#define GUC_LOAD_TIME_WARN_MSEC 200
+
+static void print_load_status_err(struct xe_gt *gt, u32 status)
{
- u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, status);
- u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+ struct xe_mmio *mmio = &gt->mmio;
+ u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+ u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+ xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ bootrom, ukernel,
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
- switch (uk_val) {
+ switch (bootrom) {
+ case XE_BOOTROM_STATUS_NO_KEY_FOUND:
+ xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
+ xe_mmio_read32(mmio, GUC_HEADER_INFO));
+ break;
+ case XE_BOOTROM_STATUS_RSA_FAILED:
+ xe_gt_err(gt, "firmware signature verification failed\n");
+ break;
+ case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
+ xe_gt_err(gt, "firmware production part check failure\n");
+ break;
+ }
+
+ switch (ukernel) {
+ case XE_GUC_LOAD_STATUS_HWCONFIG_START:
+ xe_gt_err(gt, "still extracting hwconfig table.\n");
+ break;
+ case XE_GUC_LOAD_STATUS_EXCEPTION:
+ xe_gt_err(gt, "firmware exception. EIP: %#x\n",
+ xe_mmio_read32(mmio, SOFT_SCRATCH(13)));
+ break;
+ case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID:
+ xe_gt_err(gt, "illegal init/ADS data\n");
+ break;
+ case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ xe_gt_err(gt, "illegal register in save/restore workaround list\n");
+ break;
+ case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
+ xe_gt_err(gt, "illegal workaround KLV data\n");
+ break;
+ case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG:
+ xe_gt_err(gt, "illegal feature flag specified\n");
+ break;
+ }
+}
+
+/*
+ * Check GUC_STATUS looking for known terminal states (either completion or
+ * failure) of either the microkernel status field or the boot ROM status field.
+ *
+ * Returns 1 for successful completion, -1 for failure and 0 for any
+ * intermediate state.
+ */
+static int guc_load_done(struct xe_gt *gt, u32 *status, u32 *tries)
+{
+ u32 ukernel, bootrom;
+
+ *status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
+ ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, *status);
+ bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, *status);
+
+ switch (ukernel) {
case XE_GUC_LOAD_STATUS_READY:
return 1;
-
case XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
@@ -1000,7 +1074,7 @@ static int guc_load_done(u32 status)
return -1;
}
- switch (br_val) {
+ switch (bootrom) {
case XE_BOOTROM_STATUS_NO_KEY_FOUND:
case XE_BOOTROM_STATUS_RSA_FAILED:
case XE_BOOTROM_STATUS_PAVPC_FAILED:
@@ -1014,165 +1088,58 @@ static int guc_load_done(u32 status)
return -1;
}
- return 0;
-}
+ if (++*tries >= 100) {
+ struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
-static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
-{
- u32 freq;
- int ret = xe_guc_pc_get_cur_freq(guc_pc, &freq);
+ *tries = 0;
+ xe_gt_dbg(gt, "GuC load still in progress, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
+ xe_guc_pc_get_act_freq(guc_pc),
+ xe_guc_pc_get_cur_freq_fw(guc_pc),
+ *status, ukernel, bootrom);
+ }
- return ret ? ret : freq;
+ return 0;
}
-/*
- * Wait for the GuC to start up.
- *
- * Measurements indicate this should take no more than 20ms (assuming the GT
- * clock is at maximum frequency). However, thermal throttling and other issues
- * can prevent the clock hitting max and thus making the load take significantly
- * longer. Allow up to 200ms as a safety margin for real world worst case situations.
- *
- * However, bugs anywhere from KMD to GuC to PCODE to fan failure in a CI farm can
- * lead to even longer times. E.g. if the GT is clamped to minimum frequency then
- * the load times can be in the seconds range. So the timeout is increased for debug
- * builds to ensure that problems can be correctly analysed. For release builds, the
- * timeout is kept short so that users don't wait forever to find out that there is a
- * problem. In either case, if the load took longer than is reasonable even with some
- * 'sensible' throttling, then flag a warning because something is not right.
- *
- * Note that there is a limit on how long an individual usleep_range() can wait for,
- * hence longer waits require wrapping a shorter wait in a loop.
- *
- * Note that the only reason an end user should hit the shorter timeout is in case of
- * extreme thermal throttling. And a system that is that hot during boot is probably
- * dead anyway!
- */
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
-#define GUC_LOAD_RETRY_LIMIT 20
-#else
-#define GUC_LOAD_RETRY_LIMIT 3
-#endif
-#define GUC_LOAD_TIME_WARN_MS 200
-
static int guc_wait_ucode(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
- struct xe_mmio *mmio = &gt->mmio;
struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
- ktime_t before, after, delta;
- int load_done;
- u32 status = 0;
- int count = 0;
+ u32 before_freq, act_freq, cur_freq;
+ u32 status = 0, tries = 0;
+ ktime_t before;
u64 delta_ms;
- u32 before_freq;
+ int ret;
before_freq = xe_guc_pc_get_act_freq(guc_pc);
before = ktime_get();
- /*
- * Note, can't use any kind of timing information from the call to xe_mmio_wait.
- * It could return a thousand intermediate stages at random times. Instead, must
- * manually track the total time taken and locally implement the timeout.
- */
- do {
- u32 last_status = status & (GS_UKERNEL_MASK | GS_BOOTROM_MASK);
- int ret;
-
- /*
- * Wait for any change (intermediate or terminal) in the status register.
- * Note, the return value is a don't care. The only failure code is timeout
- * but the timeouts need to be accumulated over all the intermediate partial
- * timeouts rather than allowing a huge timeout each time. So basically, need
- * to treat a timeout no different to a value change.
- */
- ret = xe_mmio_wait32_not(mmio, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
- last_status, 1000 * 1000, &status, false);
- if (ret < 0)
- count++;
- after = ktime_get();
- delta = ktime_sub(after, before);
- delta_ms = ktime_to_ms(delta);
-
- load_done = guc_load_done(status);
- if (load_done != 0)
- break;
- if (delta_ms >= (GUC_LOAD_RETRY_LIMIT * 1000))
- break;
+ ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret,
+ 10 * USEC_PER_MSEC,
+ GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false);
- xe_gt_dbg(gt, "load still in progress, timeouts = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
- count, xe_guc_pc_get_act_freq(guc_pc),
- guc_pc_get_cur_freq(guc_pc), status,
- REG_FIELD_GET(GS_BOOTROM_MASK, status),
- REG_FIELD_GET(GS_UKERNEL_MASK, status));
- } while (1);
+ delta_ms = ktime_to_ms(ktime_sub(ktime_get(), before));
+ act_freq = xe_guc_pc_get_act_freq(guc_pc);
+ cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc);
- if (load_done != 1) {
- u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
- u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
-
- xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz), done = %d\n",
+ if (ret) {
+ xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n",
status, delta_ms, xe_guc_pc_get_act_freq(guc_pc),
- guc_pc_get_cur_freq(guc_pc), load_done);
- xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
- REG_FIELD_GET(GS_MIA_IN_RESET, status),
- bootrom, ukernel,
- REG_FIELD_GET(GS_MIA_MASK, status),
- REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
-
- switch (bootrom) {
- case XE_BOOTROM_STATUS_NO_KEY_FOUND:
- xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
- xe_mmio_read32(mmio, GUC_HEADER_INFO));
- break;
-
- case XE_BOOTROM_STATUS_RSA_FAILED:
- xe_gt_err(gt, "firmware signature verification failed\n");
- break;
-
- case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
- xe_gt_err(gt, "firmware production part check failure\n");
- break;
- }
-
- switch (ukernel) {
- case XE_GUC_LOAD_STATUS_HWCONFIG_START:
- xe_gt_err(gt, "still extracting hwconfig table.\n");
- break;
-
- case XE_GUC_LOAD_STATUS_EXCEPTION:
- xe_gt_err(gt, "firmware exception. EIP: %#x\n",
- xe_mmio_read32(mmio, SOFT_SCRATCH(13)));
- break;
-
- case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID:
- xe_gt_err(gt, "illegal init/ADS data\n");
- break;
-
- case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
- xe_gt_err(gt, "illegal register in save/restore workaround list\n");
- break;
-
- case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
- xe_gt_err(gt, "illegal workaround KLV data\n");
- break;
-
- case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG:
- xe_gt_err(gt, "illegal feature flag specified\n");
- break;
- }
+ xe_guc_pc_get_cur_freq_fw(guc_pc));
+ print_load_status_err(gt, status);
return -EPROTO;
- } else if (delta_ms > GUC_LOAD_TIME_WARN_MS) {
- xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n",
- delta_ms, status, count);
- xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n",
- xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
- before_freq, xe_gt_throttle_get_limit_reasons(gt));
+ }
+
+ if (delta_ms > GUC_LOAD_TIME_WARN_MSEC) {
+ xe_gt_warn(gt, "GuC load: excessive init time: %lldms! [status = 0x%08X]\n",
+ delta_ms, status);
+ xe_gt_warn(gt, "GuC load: excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n",
+ act_freq, cur_freq, before_freq,
+ xe_gt_throttle_get_limit_reasons(gt));
} else {
- xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, timeouts = %d\n",
- delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
- before_freq, status, count);
+ xe_gt_dbg(gt, "GuC load: init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X\n",
+ delta_ms, act_freq, cur_freq, before_freq, status);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 53fdf59524c4..3c0feb50a1e2 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -7,6 +7,7 @@
#include <linux/cleanup.h>
#include <linux/delay.h>
+#include <linux/iopoll.h>
#include <linux/jiffies.h>
#include <linux/ktime.h>
#include <linux/wait_bit.h>
@@ -130,26 +131,16 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc)
FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
static int wait_for_pc_state(struct xe_guc_pc *pc,
- enum slpc_global_state state,
+ enum slpc_global_state target_state,
int timeout_ms)
{
- int timeout_us = 1000 * timeout_ms;
- int slept, wait = 10;
+ enum slpc_global_state state;
xe_device_assert_mem_access(pc_to_xe(pc));
- for (slept = 0; slept < timeout_us;) {
- if (slpc_shared_data_read(pc, header.global_state) == state)
- return 0;
-
- usleep_range(wait, wait << 1);
- slept += wait;
- wait <<= 1;
- if (slept + wait > timeout_us)
- wait = timeout_us - slept;
- }
-
- return -ETIMEDOUT;
+ return poll_timeout_us(state = slpc_shared_data_read(pc, header.global_state),
+ state == target_state,
+ 20, timeout_ms * USEC_PER_MSEC, false);
}
static int wait_for_flush_complete(struct xe_guc_pc *pc)
@@ -164,24 +155,15 @@ static int wait_for_flush_complete(struct xe_guc_pc *pc)
return 0;
}
-static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
+static int wait_for_act_freq_max_limit(struct xe_guc_pc *pc, u32 max_limit)
{
- int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
- int slept, wait = 10;
-
- for (slept = 0; slept < timeout_us;) {
- if (xe_guc_pc_get_act_freq(pc) <= freq)
- return 0;
-
- usleep_range(wait, wait << 1);
- slept += wait;
- wait <<= 1;
- if (slept + wait > timeout_us)
- wait = timeout_us - slept;
- }
+ u32 freq;
- return -ETIMEDOUT;
+ return poll_timeout_us(freq = xe_guc_pc_get_act_freq(pc),
+ freq <= max_limit,
+ 20, SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC, false);
}
+
static int pc_action_reset(struct xe_guc_pc *pc)
{
struct xe_guc_ct *ct = pc_to_ct(pc);
@@ -983,7 +965,7 @@ void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
* Wait for actual freq to go below the flush cap: even if the previous
* max was below cap, the current one might still be above it
*/
- ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ ret = wait_for_act_freq_max_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
if (ret)
xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index d6625c71115b..96afa49f0b4b 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -83,8 +83,58 @@ static struct lockdep_map xe_pm_runtime_d3cold_map = {
static struct lockdep_map xe_pm_runtime_nod3cold_map = {
.name = "xe_rpm_nod3cold_map"
};
+
+static struct lockdep_map xe_pm_block_lockdep_map = {
+ .name = "xe_pm_block_map",
+};
#endif
+static void xe_pm_block_begin_signalling(void)
+{
+ lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_);
+}
+
+static void xe_pm_block_end_signalling(void)
+{
+ lock_release(&xe_pm_block_lockdep_map, _RET_IP_);
+}
+
+/**
+ * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend
+ *
+ * Annotation to use where the code might block or sieze to make
+ * progress pending resume completion.
+ */
+void xe_pm_might_block_on_suspend(void)
+{
+ lock_map_acquire(&xe_pm_block_lockdep_map);
+ lock_map_release(&xe_pm_block_lockdep_map);
+}
+
+/**
+ * xe_pm_might_block_on_suspend() - Block pending suspend.
+ * @xe: The xe device about to be suspended.
+ *
+ * Block if the pm notifier has start evicting bos, to avoid
+ * racing and validating those bos back. The function is
+ * annotated to ensure no locks are held that are also grabbed
+ * in the pm notifier or the device suspend / resume.
+ * This is intended to be used by freezable tasks only.
+ * (Not freezable workqueues), with the intention that the function
+ * returns %-ERESTARTSYS when tasks are frozen during suspend,
+ * and allows the task to freeze. The caller must be able to
+ * handle the %-ERESTARTSYS.
+ *
+ * Return: %0 on success, %-ERESTARTSYS on signal pending or
+ * if freezing requested.
+ */
+int xe_pm_block_on_suspend(struct xe_device *xe)
+{
+ xe_pm_might_block_on_suspend();
+
+ return wait_for_completion_interruptible(&xe->pm_block);
+}
+
/**
* xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
* @xe: The xe device.
@@ -124,6 +174,7 @@ int xe_pm_suspend(struct xe_device *xe)
int err;
drm_dbg(&xe->drm, "Suspending device\n");
+ xe_pm_block_begin_signalling();
trace_xe_pm_suspend(xe, __builtin_return_address(0));
err = xe_pxp_pm_suspend(xe->pxp);
@@ -155,6 +206,8 @@ int xe_pm_suspend(struct xe_device *xe)
xe_i2c_pm_suspend(xe);
drm_dbg(&xe->drm, "Device suspended\n");
+ xe_pm_block_end_signalling();
+
return 0;
err_display:
@@ -162,6 +215,7 @@ err_display:
xe_pxp_pm_resume(xe->pxp);
err:
drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
+ xe_pm_block_end_signalling();
return err;
}
@@ -178,6 +232,7 @@ int xe_pm_resume(struct xe_device *xe)
u8 id;
int err;
+ xe_pm_block_begin_signalling();
drm_dbg(&xe->drm, "Resuming device\n");
trace_xe_pm_resume(xe, __builtin_return_address(0));
@@ -222,9 +277,11 @@ int xe_pm_resume(struct xe_device *xe)
xe_late_bind_fw_load(&xe->late_bind);
drm_dbg(&xe->drm, "Device resumed\n");
+ xe_pm_block_end_signalling();
return 0;
err:
drm_dbg(&xe->drm, "Device resume failed %d\n", err);
+ xe_pm_block_end_signalling();
return err;
}
@@ -329,9 +386,16 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
switch (action) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
+ {
+ struct xe_validation_ctx ctx;
+
reinit_completion(&xe->pm_block);
+ xe_pm_block_begin_signalling();
xe_pm_runtime_get(xe);
+ (void)xe_validation_ctx_init(&ctx, &xe->val, NULL,
+ (struct xe_val_flags) {.exclusive = true});
err = xe_bo_evict_all_user(xe);
+ xe_validation_ctx_fini(&ctx);
if (err)
drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
@@ -343,7 +407,9 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
* avoid a runtime suspend interfering with evicted objects or backup
* allocations.
*/
+ xe_pm_block_end_signalling();
break;
+ }
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
complete_all(&xe->pm_block);
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 59678b310e55..f7f89a18b6fc 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -33,6 +33,8 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
bool xe_rpm_reclaim_safe(const struct xe_device *xe);
struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
+int xe_pm_block_on_suspend(struct xe_device *xe);
+void xe_pm_might_block_on_suspend(void);
int xe_pm_module_init(void);
#endif
diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c
index 45d142191d60..6a54e38b81ba 100644
--- a/drivers/gpu/drm/xe/xe_psmi.c
+++ b/drivers/gpu/drm/xe/xe_psmi.c
@@ -70,8 +70,8 @@ static struct xe_bo *psmi_alloc_object(struct xe_device *xe,
{
struct xe_tile *tile;
- if (!id || !bo_size)
- return NULL;
+ xe_assert(xe, id);
+ xe_assert(xe, bo_size);
tile = &xe->tiles[id - 1];
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index e1b603aba61b..2e9ff33ed2fe 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -276,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[0].instance = 0;
mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
- if (perfmon_capable())
- mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
+ mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
mem_regions->num_mem_regions = 1;
for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
@@ -293,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
man->size;
- if (perfmon_capable()) {
- xe_ttm_vram_get_used(man,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].used,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].cpu_visible_used);
- }
+ xe_ttm_vram_get_used(man,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].used,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].cpu_visible_used);
mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
xe_ttm_vram_get_cpu_visible_size(man);
diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c
index 5523874cba7b..a3f437d38f86 100644
--- a/drivers/gpu/drm/xe/xe_tile_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c
@@ -6,6 +6,7 @@
#include <linux/debugfs.h>
#include <drm/drm_debugfs.h>
+#include "xe_ggtt.h"
#include "xe_pm.h"
#include "xe_sa.h"
#include "xe_tile_debugfs.h"
@@ -90,6 +91,11 @@ static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data)
return ret;
}
+static int ggtt(struct xe_tile *tile, struct drm_printer *p)
+{
+ return xe_ggtt_dump(tile->mem.ggtt, p);
+}
+
static int sa_info(struct xe_tile *tile, struct drm_printer *p)
{
drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
@@ -100,6 +106,7 @@ static int sa_info(struct xe_tile *tile, struct drm_printer *p)
/* only for debugfs files which can be safely used on the VF */
static const struct drm_info_list vf_safe_debugfs_list[] = {
+ { "ggtt", .show = tile_debugfs_show_with_rpm, .data = ggtt },
{ "sa_info", .show = tile_debugfs_show_with_rpm, .data = sa_info },
};
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 0cacab20ff85..80b7f13ecd80 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -466,6 +466,8 @@ static void preempt_rebind_work_func(struct work_struct *w)
retry:
if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
up_write(&vm->lock);
+ /* We don't actually block but don't make progress. */
+ xe_pm_might_block_on_suspend();
return;
}