summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-05-06 16:16:13 +1000
committerDave Airlie <airlied@redhat.com>2022-05-06 16:16:14 +1000
commitaf3847a7472d2def8358b7ae94b14f1d20fd8661 (patch)
tree7f091b64716f8ba49e46df46af3303471f162e36
parent97ab530870cc23bf20952ce8a1d86196dddc2e6e (diff)
parent1df1c79cbb7ac9bf148930be3418973c76ba8dde (diff)
Merge tag 'drm-intel-gt-next-2022-05-05' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - Add kerneldoc for engine class enum (Matt Roper) - Add compute engine ABI (Matt Roper) Driver Changes: - Define GuC firmware version for DG2 (John Harrison) - Clear SET_PREDICATE_RESULT prior to executing the ring (Chris Wilson) - Fix race in __i915_vma_remove_closed (Karol Herbst) - Add register for compute engine's MMIO-based TLB invalidation (Matt Roper) - Xe_HP SDV and DG2 have up to 4 CCS engines (Daniele Ceraolo Spurio) - Add initial Ponte Vecchio definitions (Stuart Summers) - Document the eviction of the Flat-CCS objects (Ramalingam C) - Use existing uncore helper to read gpm_timestamp (Umesh Nerlige Ramappa) - Fix issue with LRI relative addressing (Akeem G Abodunrin) - Skip poisoning SET_PREDICATE_RESULT on dg2 (Chris Wilson) - Optimize the ccs_sz calculation per chunk (Ramalingam C) - Remove superfluous string helper include (Jani Nikula) - Fix assert in i915_ggtt_pin (Tvrtko Ursulin) - Use IOMEM_ERR_PTR() directly (Kefeng Wang) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YnNxCm1pyflu3taj@tursulin-mobl2
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c54
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c75
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c59
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c53
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c19
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c1
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.c1
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.h2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c27
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c19
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h1
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.c1
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h1
-rw-r--r--include/uapi/drm/i915_drm.h62
24 files changed, 319 insertions, 95 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 9529c5455bc3..3e13960615bd 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -5,6 +5,7 @@
#include "gen8_engine_cs.h"
#include "i915_drv.h"
+#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_lrc.h"
#include "intel_ring.h"
@@ -385,6 +386,59 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
return 0;
}
+static int __gen125_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags,
+ u32 arb)
+{
+ struct intel_context *ce = rq->context;
+ u32 wa_offset = lrc_indirect_bb(ce);
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 12);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_ARB_ON_OFF | arb;
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0));
+ *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA;
+ *cs++ = 0;
+
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8;
+ *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB;
+ *cs++ = 0;
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int gen125_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
+}
+
+int gen125_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
+}
+
int gen8_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
index 107ab42539ab..32e3d2b831bb 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -31,6 +31,13 @@ int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
+int gen125_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+int gen125_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index 594a629cb28f..75a0c55c5aa5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -148,6 +148,7 @@
(REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
+#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) /* gen12+ */
#define MI_PREDICATE_RESULT_2(base) _MMIO((base) + 0x3bc)
#define LOWER_SLICE_ENABLED (1 << 0)
#define LOWER_SLICE_DISABLED (0 << 0)
@@ -193,6 +194,7 @@
#define RING_TIMESTAMP_UDW(base) _MMIO((base) + 0x358 + 4)
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */
+#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8)
#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4)
#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2)
#define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 0f6cd96b459f..46a174f8aa00 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
- /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
+ [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
};
static int engine_cmp(void *priv, const struct list_head *A,
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index f8749c433b7c..86f7a9ac1c39 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3433,10 +3433,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
}
}
- if (intel_engine_has_preemption(engine))
- engine->emit_bb_start = gen8_emit_bb_start;
- else
- engine->emit_bb_start = gen8_emit_bb_start_noarb;
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
+ if (intel_engine_has_preemption(engine))
+ engine->emit_bb_start = gen125_emit_bb_start;
+ else
+ engine->emit_bb_start = gen125_emit_bb_start_noarb;
+ } else {
+ if (intel_engine_has_preemption(engine))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen8_emit_bb_start_noarb;
+ }
engine->busyness = execlists_engine_busyness;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index e52718a87f14..556bca3be804 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -39,6 +39,8 @@
#define MI_GLOBAL_GTT (1<<22)
#define MI_NOOP MI_INSTR(0, 0)
+#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
+#define MI_SET_PREDICATE_DISABLE (0 << 0)
#define MI_USER_INTERRUPT MI_INSTR(0x02, 0)
#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0)
#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 92394f13b42f..53307ca0eed0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1175,6 +1175,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
[VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
[VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
+ [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
};
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index a39718a40cc3..a0a49c16babd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1007,6 +1007,7 @@
#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
+#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04)
#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28)
#define RENDER_MOD_CTRL _MMIO(0xcf2c)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3f83a9038e13..eec73c66406c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -904,6 +904,24 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
engine->name);
}
+static u32 context_wa_bb_offset(const struct intel_context *ce)
+{
+ return PAGE_SIZE * ce->wa_bb_page;
+}
+
+static u32 *context_indirect_bb(const struct intel_context *ce)
+{
+ void *ptr;
+
+ GEM_BUG_ON(!ce->wa_bb_page);
+
+ ptr = ce->lrc_reg_state;
+ ptr -= LRC_STATE_OFFSET; /* back to start of context image */
+ ptr += context_wa_bb_offset(ce);
+
+ return ptr;
+}
+
void lrc_init_state(struct intel_context *ce,
struct intel_engine_cs *engine,
void *state)
@@ -922,6 +940,10 @@ void lrc_init_state(struct intel_context *ce,
/* Clear the ppHWSP (inc. per-context counters) */
memset(state, 0, PAGE_SIZE);
+ /* Clear the indirect wa and storage */
+ if (ce->wa_bb_page)
+ memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
+
/*
* The second page of the context object contains some registers which
* must be set up prior to the first execution.
@@ -929,6 +951,35 @@ void lrc_init_state(struct intel_context *ce,
__lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
}
+u32 lrc_indirect_bb(const struct intel_context *ce)
+{
+ return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
+}
+
+static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
+{
+ /* If predication is active, this will be noop'ed */
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
+ *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
+ *cs++ = 0;
+ *cs++ = 0; /* No predication */
+
+ /* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
+ *cs++ = MI_BATCH_BUFFER_END | BIT(15);
+ *cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
+
+ /* Instructions are no longer predicated (disabled), we can proceed */
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
+ *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
+ *cs++ = 0;
+ *cs++ = 1; /* enable predication before the next BB */
+
+ *cs++ = MI_BATCH_BUFFER_END;
+ GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
+
+ return cs;
+}
+
static struct i915_vma *
__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
{
@@ -1240,24 +1291,6 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
return cs;
}
-static u32 context_wa_bb_offset(const struct intel_context *ce)
-{
- return PAGE_SIZE * ce->wa_bb_page;
-}
-
-static u32 *context_indirect_bb(const struct intel_context *ce)
-{
- void *ptr;
-
- GEM_BUG_ON(!ce->wa_bb_page);
-
- ptr = ce->lrc_reg_state;
- ptr -= LRC_STATE_OFFSET; /* back to start of context image */
- ptr += context_wa_bb_offset(ce);
-
- return ptr;
-}
-
static void
setup_indirect_ctx_bb(const struct intel_context *ce,
const struct intel_engine_cs *engine,
@@ -1271,9 +1304,11 @@ setup_indirect_ctx_bb(const struct intel_context *ce,
while ((unsigned long)cs % CACHELINE_BYTES)
*cs++ = MI_NOOP;
+ GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
+ setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
+
lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
- i915_ggtt_offset(ce->state) +
- context_wa_bb_offset(ce),
+ lrc_indirect_bb(ce),
(cs - start) * sizeof(*cs));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 7371bb5c8129..31be734010db 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -145,4 +145,9 @@ static inline void lrc_runtime_stop(struct intel_context *ce)
WRITE_ONCE(stats->active, 0);
}
+#define DG2_PREDICATE_RESULT_WA (PAGE_SIZE - sizeof(u64))
+#define DG2_PREDICATE_RESULT_BB (2048)
+
+u32 lrc_indirect_bb(const struct intel_context *ce);
+
#endif /* __INTEL_LRC_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 9d552f30b627..2c35324b5f68 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -485,16 +485,21 @@ static bool wa_1209644611_applies(int ver, u32 size)
* And CCS data can be copied in and out of CCS region through
* XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
*
- * When we exhaust the lmem, if the object's placements support smem, then we can
- * directly decompress the compressed lmem object into smem and start using it
- * from smem itself.
+ * I915 supports Flat-CCS on lmem only objects. When an objects has smem in
+ * its preference list, on memory pressure, i915 needs to migrate the lmem
+ * content into smem. If the lmem object is Flat-CCS compressed by userspace,
+ * then i915 needs to decompress it. But I915 lack the required information
+ * for such decompression. Hence I915 supports Flat-CCS only on lmem only objects.
*
- * But when we need to swapout the compressed lmem object into a smem region
- * though objects' placement doesn't support smem, then we copy the lmem content
- * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT).
- * When the object is referred, lmem content will be swaped in along with
- * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding
- * location.
+ * When we exhaust the lmem, Flat-CCS capable objects' lmem backing memory can
+ * be temporarily evicted to smem, along with the auxiliary CCS state, where
+ * it can be potentially swapped-out at a later point, if required.
+ * If userspace later touches the evicted pages, then we always move
+ * the backing memory back to lmem, which includes restoring the saved CCS state,
+ * and potentially performing any required swap-in.
+ *
+ * For the migration of the lmem objects with smem in placement list, such as
+ * {lmem, smem}, objects are treated as non Flat-CCS capable objects.
*/
static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
@@ -647,17 +652,9 @@ static int scatter_list_length(struct scatterlist *sg)
static void
calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
- int *src_sz, int *ccs_sz, u32 bytes_to_cpy,
- u32 ccs_bytes_to_cpy)
+ int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy)
{
if (ccs_bytes_to_cpy) {
- /*
- * We can only copy the ccs data corresponding to
- * the CHUNK_SZ of lmem which is
- * GET_CCS_BYTES(i915, CHUNK_SZ))
- */
- *ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ));
-
if (!src_is_lmem)
/*
* When CHUNK_SZ is passed all the pages upto CHUNK_SZ
@@ -707,10 +704,10 @@ intel_context_migrate_copy(struct intel_context *ce,
struct drm_i915_private *i915 = ce->engine->i915;
u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
enum i915_cache_level ccs_cache_level;
- int src_sz, dst_sz, ccs_sz;
u32 src_offset, dst_offset;
u8 src_access, dst_access;
struct i915_request *rq;
+ int src_sz, dst_sz;
bool ccs_is_src;
int err;
@@ -791,7 +788,7 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
- calculate_chunk_sz(i915, src_is_lmem, &src_sz, &ccs_sz,
+ calculate_chunk_sz(i915, src_is_lmem, &src_sz,
bytes_to_cpy, ccs_bytes_to_cpy);
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
@@ -825,37 +822,35 @@ intel_context_migrate_copy(struct intel_context *ce,
bytes_to_cpy -= len;
if (ccs_bytes_to_cpy) {
+ int ccs_sz;
+
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
goto out_rq;
+ ccs_sz = GET_CCS_BYTES(i915, len);
err = emit_pte(rq, &it_ccs, ccs_cache_level, false,
ccs_is_src ? src_offset : dst_offset,
ccs_sz);
+ if (err < 0)
+ goto out_rq;
+ if (err < ccs_sz) {
+ err = -EINVAL;
+ goto out_rq;
+ }
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
goto out_rq;
- /*
- * Using max of src_sz and dst_sz, as we need to
- * pass the lmem size corresponding to the ccs
- * blocks we need to handle.
- */
- ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz,
- ccs_is_src ? dst_sz : ccs_sz);
-
err = emit_copy_ccs(rq, dst_offset, dst_access,
- src_offset, src_access, ccs_sz);
+ src_offset, src_access, len);
if (err)
goto out_rq;
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
goto out_rq;
-
- /* Converting back to ccs bytes */
- ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz);
ccs_bytes_to_cpy -= ccs_sz;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 9881a6790574..fdd25691beda 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -10,8 +10,6 @@
#include "intel_gt_regs.h"
#include "intel_sseu.h"
-#include "linux/string_helpers.h"
-
void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
u8 max_subslices, u8 max_eus_per_subslice)
{
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6ba52ef1acb8..8b2c11dbe354 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -128,6 +128,27 @@ static int context_flush(struct intel_context *ce, long timeout)
return err;
}
+static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
+{
+ if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
+ return ~0u;
+
+ if (GRAPHICS_VER(engine->i915) < 12)
+ return 0xfff;
+
+ switch (engine->class) {
+ default:
+ case RENDER_CLASS:
+ case COMPUTE_CLASS:
+ return 0x07ff;
+ case COPY_ENGINE_CLASS:
+ return 0x0fff;
+ case VIDEO_DECODE_CLASS:
+ case VIDEO_ENHANCEMENT_CLASS:
+ return 0x3fff;
+ }
+}
+
static int live_lrc_layout(void *arg)
{
struct intel_gt *gt = arg;
@@ -167,6 +188,7 @@ static int live_lrc_layout(void *arg)
dw = 0;
do {
u32 lri = READ_ONCE(hw[dw]);
+ u32 lri_mask;
if (lri == 0) {
dw++;
@@ -194,6 +216,18 @@ static int live_lrc_layout(void *arg)
break;
}
+ /*
+ * When bit 19 of MI_LOAD_REGISTER_IMM instruction
+ * opcode is set on Gen12+ devices, HW does not
+ * care about certain register address offsets, and
+ * instead check the following for valid address
+ * ranges on specific engines:
+ * RCS && CCS: BITS(0 - 10)
+ * BCS: BITS(0 - 11)
+ * VECS && VCS: BITS(0 - 13)
+ */
+ lri_mask = get_lri_mask(engine, lri);
+
lri &= 0x7f;
lri++;
dw++;
@@ -201,7 +235,7 @@ static int live_lrc_layout(void *arg)
while (lri) {
u32 offset = READ_ONCE(hw[dw]);
- if (offset != lrc[dw]) {
+ if ((offset ^ lrc[dw]) & lri_mask) {
pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
engine->name, dw, offset, lrc[dw]);
err = -EINVAL;
@@ -911,6 +945,19 @@ create_user_vma(struct i915_address_space *vm, unsigned long size)
return vma;
}
+static u32 safe_poison(u32 offset, u32 poison)
+{
+ /*
+ * Do not enable predication as it will nop all subsequent commands,
+ * not only disabling the tests (by preventing all the other SRM) but
+ * also preventing the arbitration events at the end of the request.
+ */
+ if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
+ poison &= ~REG_BIT(0);
+
+ return poison;
+}
+
static struct i915_vma *
store_context(struct intel_context *ce, struct i915_vma *scratch)
{
@@ -1120,7 +1167,9 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
*cs++ = MI_LOAD_REGISTER_IMM(len);
while (len--) {
*cs++ = hw[dw];
- *cs++ = poison;
+ *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
+ MI_LRI_LRM_CS_MMIO),
+ poison);
dw += 2;
}
} while (dw < PAGE_SIZE / sizeof(u32) &&
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 61a6f2424e24..75291e9846c5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1200,20 +1200,6 @@ static u32 gpm_timestamp_shift(struct intel_gt *gt)
return 3 - shift;
}
-static u64 gpm_timestamp(struct intel_gt *gt)
-{
- u32 lo, hi, old_hi, loop = 0;
-
- hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
- do {
- lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
- old_hi = hi;
- hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
- } while (old_hi != hi && loop++ < 2);
-
- return ((u64)hi << 32) | lo;
-}
-
static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -1223,7 +1209,8 @@ static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
lockdep_assert_held(&guc->timestamp.lock);
gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
- gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+ gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
+ MISC_STATUS1) >> guc->timestamp.shift;
gt_stamp_lo = lower_32_bits(gpm_ts);
*now = ktime_get();
@@ -3910,6 +3897,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
*/
engine->emit_bb_start = gen8_emit_bb_start;
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ engine->emit_bb_start = gen125_emit_bb_start;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index a876d39e6bcf..d078f884b5e3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* firmware as TGL.
*/
#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+ fw_def(DG2, 0, guc_def(dg2, 70, 1, 2)) \
fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \
fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \
fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 475a6f824cad..18d38cb59923 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -81,6 +81,7 @@ static const char * const uabi_class_names[] = {
[I915_ENGINE_CLASS_COPY] = "copy",
[I915_ENGINE_CLASS_VIDEO] = "video",
[I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
+ [I915_ENGINE_CLASS_COMPUTE] = "compute",
};
static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 5f5b02b01ba0..f796c5e8e060 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -13,7 +13,7 @@
#include "gt/intel_engine_types.h"
-#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
+#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
struct drm_i915_private;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a6cf9716d6aa..3ed9021c615d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1059,6 +1059,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P)
#define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV)
#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2)
+#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO)
+
#define IS_DG2_G10(dev_priv) \
IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10)
#define IS_DG2_G11(dev_priv) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 38f7de778914..987bdeb090a5 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1037,7 +1037,8 @@ static const struct intel_device_info xehpsdv_info = {
BIT(RCS0) | BIT(BCS0) |
BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) |
BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) |
- BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7),
+ BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7) |
+ BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3),
.require_force_probe = 1,
};
@@ -1056,7 +1057,8 @@ static const struct intel_device_info xehpsdv_info = {
.platform_engine_mask = \
BIT(RCS0) | BIT(BCS0) | \
BIT(VECS0) | BIT(VECS1) | \
- BIT(VCS0) | BIT(VCS2)
+ BIT(VCS0) | BIT(VCS2) | \
+ BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3)
__maybe_unused
static const struct intel_device_info dg2_info = {
@@ -1074,6 +1076,27 @@ static const struct intel_device_info ats_m_info = {
.require_force_probe = 1,
};
+#define XE_HPC_FEATURES \
+ XE_HP_FEATURES, \
+ .dma_mask_size = 52
+
+__maybe_unused
+static const struct intel_device_info pvc_info = {
+ XE_HPC_FEATURES,
+ XE_HPM_FEATURES,
+ DGFX_FEATURES,
+ .graphics.rel = 60,
+ .media.rel = 60,
+ PLATFORM(INTEL_PONTEVECCHIO),
+ .display = { 0 },
+ .has_flat_ccs = 0,
+ .platform_engine_mask =
+ BIT(BCS0) |
+ BIT(VCS0) |
+ BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3),
+ .require_force_probe = 1,
+};
+
#undef PLATFORM
/*
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 162e8d83691b..4f6db539571a 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -548,7 +548,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
int err;
if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY))
- return IO_ERR_PTR(-EINVAL);
+ return IOMEM_ERR_PTR(-EINVAL);
if (!i915_gem_object_is_lmem(vma->obj)) {
if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
@@ -601,7 +601,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
err_unpin:
__i915_vma_unpin(vma);
err:
- return IO_ERR_PTR(err);
+ return IOMEM_ERR_PTR(err);
}
void i915_vma_flush_writes(struct i915_vma *vma)
@@ -1565,9 +1565,7 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
if (ww)
return __i915_ggtt_pin(vma, ww, align, flags);
-#ifdef CONFIG_LOCKDEP
- WARN_ON(dma_resv_held(vma->obj->base.resv));
-#endif
+ lockdep_assert_not_held(&vma->obj->base.resv->lock.base);
for_i915_gem_ww(&_ww, err, true) {
err = i915_gem_object_lock(vma->obj, &_ww);
@@ -1615,17 +1613,17 @@ void i915_vma_close(struct i915_vma *vma)
static void __i915_vma_remove_closed(struct i915_vma *vma)
{
- struct intel_gt *gt = vma->vm->gt;
-
- spin_lock_irq(&gt->closed_lock);
list_del_init(&vma->closed_link);
- spin_unlock_irq(&gt->closed_lock);
}
void i915_vma_reopen(struct i915_vma *vma)
{
+ struct intel_gt *gt = vma->vm->gt;
+
+ spin_lock_irq(&gt->closed_lock);
if (i915_vma_is_closed(vma))
__i915_vma_remove_closed(vma);
+ spin_unlock_irq(&gt->closed_lock);
}
static void force_unbind(struct i915_vma *vma)
@@ -1641,6 +1639,7 @@ static void force_unbind(struct i915_vma *vma)
static void release_references(struct i915_vma *vma, bool vm_ddestroy)
{
struct drm_i915_gem_object *obj = vma->obj;
+ struct intel_gt *gt = vma->vm->gt;
GEM_BUG_ON(i915_vma_is_active(vma));
@@ -1651,7 +1650,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy)
spin_unlock(&obj->vma.lock);
+ spin_lock_irq(&gt->closed_lock);
__i915_vma_remove_closed(vma);
+ spin_unlock_irq(&gt->closed_lock);
if (vm_ddestroy)
i915_vm_resv_put(vma->vm);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 6034991d89fe..88ca0bd9c900 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -317,7 +317,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node,
* Returns a valid iomapped pointer or ERR_PTR.
*/
void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
-#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
/**
* i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 41a5b98d1342..b0e62a411534 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -72,6 +72,7 @@ static const char * const platform_names[] = {
PLATFORM_NAME(ALDERLAKE_P),
PLATFORM_NAME(XEHPSDV),
PLATFORM_NAME(DG2),
+ PLATFORM_NAME(PONTEVECCHIO),
};
#undef PLATFORM_NAME
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 576d15a04c9e..ec0b8095e7fa 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -88,6 +88,7 @@ enum intel_platform {
INTEL_ALDERLAKE_P,
INTEL_XEHPSDV,
INTEL_DG2,
+ INTEL_PONTEVECCHIO,
INTEL_MAX_PLATFORMS
};
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 35ca528803fd..a2def7b27009 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -154,21 +154,71 @@ enum i915_mocs_table_index {
I915_MOCS_CACHED,
};
-/*
+/**
+ * enum drm_i915_gem_engine_class - uapi engine type enumeration
+ *
* Different engines serve different roles, and there may be more than one
- * engine serving each role. enum drm_i915_gem_engine_class provides a
- * classification of the role of the engine, which may be used when requesting
- * operations to be performed on a certain subset of engines, or for providing
- * information about that group.
+ * engine serving each role. This enum provides a classification of the role
+ * of the engine, which may be used when requesting operations to be performed
+ * on a certain subset of engines, or for providing information about that
+ * group.
*/
enum drm_i915_gem_engine_class {
+ /**
+ * @I915_ENGINE_CLASS_RENDER:
+ *
+ * Render engines support instructions used for 3D, Compute (GPGPU),
+ * and programmable media workloads. These instructions fetch data and
+ * dispatch individual work items to threads that operate in parallel.
+ * The threads run small programs (called "kernels" or "shaders") on
+ * the GPU's execution units (EUs).
+ */
I915_ENGINE_CLASS_RENDER = 0,
+
+ /**
+ * @I915_ENGINE_CLASS_COPY:
+ *
+ * Copy engines (also referred to as "blitters") support instructions
+ * that move blocks of data from one location in memory to another,
+ * or that fill a specified location of memory with fixed data.
+ * Copy engines can perform pre-defined logical or bitwise operations
+ * on the source, destination, or pattern data.
+ */
I915_ENGINE_CLASS_COPY = 1,
+
+ /**
+ * @I915_ENGINE_CLASS_VIDEO:
+ *
+ * Video engines (also referred to as "bit stream decode" (BSD) or
+ * "vdbox") support instructions that perform fixed-function media
+ * decode and encode.
+ */
I915_ENGINE_CLASS_VIDEO = 2,
+
+ /**
+ * @I915_ENGINE_CLASS_VIDEO_ENHANCE:
+ *
+ * Video enhancement engines (also referred to as "vebox") support
+ * instructions related to image enhancement.
+ */
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
- /* should be kept compact */
+ /**
+ * @I915_ENGINE_CLASS_COMPUTE:
+ *
+ * Compute engines support a subset of the instructions available
+ * on render engines: compute engines support Compute (GPGPU) and
+ * programmable media workloads, but do not support the 3D pipeline.
+ */
+ I915_ENGINE_CLASS_COMPUTE = 4,
+
+ /* Values in this enum should be kept compact. */
+ /**
+ * @I915_ENGINE_CLASS_INVALID:
+ *
+ * Placeholder value to represent an invalid engine class assignment.
+ */
I915_ENGINE_CLASS_INVALID = -1
};