summaryrefslogtreecommitdiff
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2021-10-20 15:59:40 -0700
committerMarge Bot <emma+marge@anholt.net>2021-10-28 19:45:56 +0000
commit148ea65ee1554dcdb5aad37184b56cad2d3e622a (patch)
tree40773c01ef5d0a7b86632f2f7e890ae348fa306a /src/mesa/drivers
parent0a64007676ff5cdfa46fc25a479d1b4b48ca9b31 (diff)
i965: Port STATE_BASE_ADDRESS to genxml and fix bugs
This largely copies crocus's code for this (but with Gfx9+ handling). This version also fixes missing MOCS settings on several platforms, which we hadn't noticed were missing. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13480>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c196
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/genX_blorp_exec.c2
-rw-r--r--src/mesa/drivers/dri/i965/genX_state_upload.c158
6 files changed, 161 insertions, 200 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index bf00fe01607..579dcecad05 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -710,6 +710,7 @@ struct brw_context
void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
+ void (*emit_state_base_address)(struct brw_context *brw);
} vtbl;
struct brw_bufmgr *bufmgr;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 5b4ad94c605..53eac1c178f 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -726,199 +726,3 @@ brw_upload_invariant_state(struct brw_context *brw)
ADVANCE_BATCH();
}
}
-
-/**
- * Define the base addresses which some state is referenced from.
- *
- * This allows us to avoid having to emit relocations for the objects,
- * and is actually required for binding table pointers on gfx6.
- *
- * Surface state base address covers binding table pointers and
- * surface state objects, but not the surfaces that the surface state
- * objects point to.
- */
-void
-brw_upload_state_base_address(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (brw->batch.state_base_address_emitted)
- return;
-
- /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
- * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
- * programmed prior to STATE_BASE_ADDRESS.
- *
- * However, given that the instruction SBA (general state base
- * address) on this chipset is always set to 0 across X and GL,
- * maybe this isn't required for us in particular.
- */
-
- uint32_t mocs = brw_mocs(&brw->isl_dev, NULL);
-
- if (devinfo->ver >= 6) {
- const unsigned dc_flush =
- devinfo->ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
-
- /* Emit a render target cache flush.
- *
- * This isn't documented anywhere in the PRM. However, it seems to be
- * necessary prior to changing the surface state base adress. We've
- * seen issues in Vulkan where we get GPU hangs when using multi-level
- * command buffers which clear depth, reset state base address, and then
- * go render stuff.
- *
- * Normally, in GL, we would trust the kernel to do sufficient stalls
- * and flushes prior to executing our batch. However, it doesn't seem
- * as if the kernel's flushing is always sufficient and we don't want to
- * rely on it.
- *
- * We make this an end-of-pipe sync instead of a normal flush because we
- * do not know the current status of the GPU. On Haswell at least,
- * having a fast-clear operation in flight at the same time as a normal
- * rendering operation can cause hangs. Since the kernel's flushing is
- * insufficient, we need to ensure that any rendering operations from
- * other processes are definitely complete before we try to do our own
- * rendering. It's a bit of a big hammer but it appears to work.
- */
- brw_emit_end_of_pipe_sync(brw,
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- dc_flush);
- }
-
- if (devinfo->ver >= 8) {
- /* STATE_BASE_ADDRESS has issues with 48-bit address spaces. If the
- * address + size as seen by STATE_BASE_ADDRESS overflows 48 bits,
- * the GPU appears to treat all accesses to the buffer as being out
- * of bounds and returns zero. To work around this, we pin all SBAs
- * to the bottom 4GB.
- */
- int pkt_len = devinfo->ver >= 10 ? 22 : (devinfo->ver >= 9 ? 19 : 16);
-
- BEGIN_BATCH(pkt_len);
- OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (pkt_len - 2));
- /* General state base address: stateless DP read/write requests */
- OUT_BATCH(mocs << 4 | 1);
- OUT_BATCH(0);
- OUT_BATCH(mocs << 16);
- /* Surface state base address: */
- OUT_RELOC64(brw->batch.state.bo, RELOC_32BIT, mocs << 4 | 1);
- /* Dynamic state base address: */
- OUT_RELOC64(brw->batch.state.bo, RELOC_32BIT, mocs << 4 | 1);
- /* Indirect object base address: MEDIA_OBJECT data */
- OUT_BATCH(mocs << 4 | 1);
- OUT_BATCH(0);
- /* Instruction base address: shader kernels (incl. SIP) */
- OUT_RELOC64(brw->cache.bo, RELOC_32BIT, mocs << 4 | 1);
- /* General state buffer size */
- OUT_BATCH(0xfffff001);
- /* Dynamic state buffer size */
- OUT_BATCH(ALIGN(MAX_STATE_SIZE, 4096) | 1);
- /* Indirect object upper bound */
- OUT_BATCH(0xfffff001);
- /* Instruction access upper bound */
- OUT_BATCH(ALIGN(brw->cache.bo->size, 4096) | 1);
- if (devinfo->ver >= 9) {
- OUT_BATCH(1);
- OUT_BATCH(0);
- OUT_BATCH(0);
- }
- if (devinfo->ver >= 10) {
- OUT_BATCH(1);
- OUT_BATCH(0);
- OUT_BATCH(0);
- }
- ADVANCE_BATCH();
- } else if (devinfo->ver >= 6) {
- BEGIN_BATCH(10);
- OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
- OUT_BATCH(mocs << 8 | /* General State Memory Object Control State */
- mocs << 4 | /* Stateless Data Port Access Memory Object Control State */
- 1); /* General State Base Address Modify Enable */
- /* Surface state base address:
- * BINDING_TABLE_STATE
- * SURFACE_STATE
- */
- OUT_RELOC(brw->batch.state.bo, 0, 1);
- /* Dynamic state base address:
- * SAMPLER_STATE
- * SAMPLER_BORDER_COLOR_STATE
- * CLIP, SF, WM/CC viewport state
- * COLOR_CALC_STATE
- * DEPTH_STENCIL_STATE
- * BLEND_STATE
- * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
- * Disable is clear, which we rely on)
- */
- OUT_RELOC(brw->batch.state.bo, 0, 1);
-
- OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
-
- /* Instruction base address: shader kernels (incl. SIP) */
- OUT_RELOC(brw->cache.bo, 0, 1);
-
- OUT_BATCH(1); /* General state upper bound */
- /* Dynamic state upper bound. Although the documentation says that
- * programming it to zero will cause it to be ignored, that is a lie.
- * If this isn't programmed to a real bound, the sampler border color
- * pointer is rejected, causing border color to mysteriously fail.
- */
- OUT_BATCH(0xfffff001);
- OUT_BATCH(1); /* Indirect object upper bound */
- OUT_BATCH(1); /* Instruction access upper bound */
- ADVANCE_BATCH();
- } else if (devinfo->ver == 5) {
- BEGIN_BATCH(8);
- OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
- OUT_BATCH(1); /* General state base address */
- OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */
- OUT_BATCH(1); /* Indirect object base address */
- OUT_RELOC(brw->cache.bo, 0, 1); /* Instruction base address */
- OUT_BATCH(0xfffff001); /* General state upper bound */
- OUT_BATCH(1); /* Indirect object upper bound */
- OUT_BATCH(1); /* Instruction access upper bound */
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(6);
- OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
- OUT_BATCH(1); /* General state base address */
- OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */
- OUT_BATCH(1); /* Indirect object base address */
- OUT_BATCH(1); /* General state upper bound */
- OUT_BATCH(1); /* Indirect object upper bound */
- ADVANCE_BATCH();
- }
-
- if (devinfo->ver >= 6) {
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_STATE_CACHE_INVALIDATE |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
- }
-
- /* According to section 3.6.1 of VOL1 of the 965 PRM,
- * STATE_BASE_ADDRESS updates require a reissue of:
- *
- * 3DSTATE_PIPELINE_POINTERS
- * 3DSTATE_BINDING_TABLE_POINTERS
- * MEDIA_STATE_POINTERS
- *
- * and this continues through Ironlake. The Sandy Bridge PRM, vol
- * 1 part 1 says that the folowing packets must be reissued:
- *
- * 3DSTATE_CC_POINTERS
- * 3DSTATE_BINDING_TABLE_POINTERS
- * 3DSTATE_SAMPLER_STATE_POINTERS
- * 3DSTATE_VIEWPORT_STATE_POINTERS
- * MEDIA_STATE_POINTERS
- *
- * Those are always reissued following SBA updates anyway (new
- * batch time), except in the case of the program cache BO
- * changing. Having a separate state flag makes the sequence more
- * obvious.
- */
-
- brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
- brw->batch.state_base_address_emitted = true;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index a16d8c82f16..17d9e543842 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -142,8 +142,6 @@ void brw_upload_invariant_state(struct brw_context *brw);
uint32_t
brw_depthbuffer_format(struct brw_context *brw);
-void brw_upload_state_base_address(struct brw_context *brw);
-
/* gfx8_depth_state.c */
void gfx8_write_pma_stall_bits(struct brw_context *brw,
uint32_t pma_stall_bits);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 8b2c1dedaf5..ca0118ed217 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -683,7 +683,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
brw_upload_programs(brw, pipeline);
merge_ctx_state(brw, &state);
- brw_upload_state_base_address(brw);
+ brw->vtbl.emit_state_base_address(brw);
const struct brw_tracked_state *atoms =
brw_get_pipeline_atoms(brw, pipeline);
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 83a93d0e65f..aed53d939d2 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -337,7 +337,7 @@ retry:
brw_emit_post_sync_nonzero_flush(brw);
#endif
- brw_upload_state_base_address(brw);
+ brw->vtbl.emit_state_base_address(brw);
#if GFX_VER >= 8
gfx7_l3_state.emit(brw);
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index d514dfaa246..7fed8356ef0 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -98,6 +98,162 @@ emit_lri(struct brw_context *brw, uint32_t reg, uint32_t imm)
#endif
/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations for the objects,
+ * and is actually required for binding table pointers on Gfx6.
+ *
+ * Surface state base address covers binding table pointers and surface state
+ * objects, but not the surfaces that the surface state objects point to.
+ */
+static void
+genX(emit_state_base_address)(struct brw_context *brw)
+{
+ if (brw->batch.state_base_address_emitted)
+ return;
+
+ /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
+ * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
+ * programmed prior to STATE_BASE_ADDRESS.
+ *
+ * However, given that the instruction SBA (general state base
+ * address) on this chipset is always set to 0 across X and GL,
+ * maybe this isn't required for us in particular.
+ */
+
+ UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL);
+
+ /* Flush before updating STATE_BASE_ADDRESS */
+#if GFX_VER >= 6
+ const unsigned dc_flush =
+ GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
+
+ /* Emit a render target cache flush.
+ *
+ * This isn't documented anywhere in the PRM. However, it seems to be
+ * necessary prior to changing the surface state base adress. We've
+ * seen issues in Vulkan where we get GPU hangs when using multi-level
+ * command buffers which clear depth, reset state base address, and then
+ * go render stuff.
+ *
+ * Normally, in GL, we would trust the kernel to do sufficient stalls
+ * and flushes prior to executing our batch. However, it doesn't seem
+ * as if the kernel's flushing is always sufficient and we don't want to
+ * rely on it.
+ *
+ * We make this an end-of-pipe sync instead of a normal flush because we
+ * do not know the current status of the GPU. On Haswell at least,
+ * having a fast-clear operation in flight at the same time as a normal
+ * rendering operation can cause hangs. Since the kernel's flushing is
+ * insufficient, we need to ensure that any rendering operations from
+ * other processes are definitely complete before we try to do our own
+ * rendering. It's a bit of a big hammer but it appears to work.
+ */
+ brw_emit_end_of_pipe_sync(brw,
+ PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ dc_flush);
+#endif
+
+ brw_batch_emit(brw, GENX(STATE_BASE_ADDRESS), sba) {
+ /* Set base addresses */
+ sba.GeneralStateBaseAddressModifyEnable = true;
+
+#if GFX_VER >= 6
+ sba.DynamicStateBaseAddressModifyEnable = true;
+ sba.DynamicStateBaseAddress = ro_bo(brw->batch.state.bo, 0);
+#endif
+
+ sba.SurfaceStateBaseAddressModifyEnable = true;
+ sba.SurfaceStateBaseAddress = ro_bo(brw->batch.state.bo, 0);
+
+ sba.IndirectObjectBaseAddressModifyEnable = true;
+
+#if GFX_VER >= 5
+ sba.InstructionBaseAddressModifyEnable = true;
+ sba.InstructionBaseAddress = ro_bo(brw->cache.bo, 0);
+#endif
+
+ /* Set buffer sizes on Gfx8+ or upper bounds on Gfx4-7 */
+#if GFX_VER >= 8
+ sba.GeneralStateBufferSize = 0xfffff;
+ sba.IndirectObjectBufferSize = 0xfffff;
+ sba.InstructionBufferSize = 0xfffff;
+ sba.DynamicStateBufferSize = MAX_STATE_SIZE;
+
+ sba.GeneralStateBufferSizeModifyEnable = true;
+ sba.DynamicStateBufferSizeModifyEnable = true;
+ sba.IndirectObjectBufferSizeModifyEnable = true;
+ sba.InstructionBuffersizeModifyEnable = true;
+#else
+ sba.GeneralStateAccessUpperBoundModifyEnable = true;
+ sba.IndirectObjectAccessUpperBoundModifyEnable = true;
+
+#if GFX_VER >= 5
+ sba.InstructionAccessUpperBoundModifyEnable = true;
+#endif
+
+#if GFX_VER >= 6
+ /* Dynamic state upper bound. Although the documentation says that
+ * programming it to zero will cause it to be ignored, that is a lie.
+ * If this isn't programmed to a real bound, the sampler border color
+ * pointer is rejected, causing border color to mysteriously fail.
+ */
+ sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
+ sba.DynamicStateAccessUpperBoundModifyEnable = true;
+#else
+ /* Same idea but using General State Base Address on Gfx4-5 */
+ sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
+#endif
+#endif
+
+#if GFX_VER >= 6
+ /* The hardware appears to pay attention to the MOCS fields even
+ * if you don't set the "Address Modify Enable" bit for the base.
+ */
+ sba.GeneralStateMOCS = mocs;
+ sba.StatelessDataPortAccessMOCS = mocs;
+ sba.DynamicStateMOCS = mocs;
+ sba.IndirectObjectMOCS = mocs;
+ sba.InstructionMOCS = mocs;
+ sba.SurfaceStateMOCS = mocs;
+#endif
+ }
+
+ /* Flush after updating STATE_BASE_ADDRESS */
+#if GFX_VER >= 6
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+ PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+#endif
+
+ /* According to section 3.6.1 of VOL1 of the 965 PRM,
+ * STATE_BASE_ADDRESS updates require a reissue of:
+ *
+ * 3DSTATE_PIPELINE_POINTERS
+ * 3DSTATE_BINDING_TABLE_POINTERS
+ * MEDIA_STATE_POINTERS
+ *
+ * and this continues through Ironlake. The Sandy Bridge PRM, vol
+ * 1 part 1 says that the folowing packets must be reissued:
+ *
+ * 3DSTATE_CC_POINTERS
+ * 3DSTATE_BINDING_TABLE_POINTERS
+ * 3DSTATE_SAMPLER_STATE_POINTERS
+ * 3DSTATE_VIEWPORT_STATE_POINTERS
+ * MEDIA_STATE_POINTERS
+ *
+ * Those are always reissued following SBA updates anyway (new
+ * batch time), except in the case of the program cache BO
+ * changing. Having a separate state flag makes the sequence more
+ * obvious.
+ */
+ brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
+ brw->batch.state_base_address_emitted = true;
+}
+
+/**
* Polygon stipple packet
*/
static void
@@ -5918,5 +6074,7 @@ genX(init_atoms)(struct brw_context *brw)
brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker);
#endif
+ brw->vtbl.emit_state_base_address = genX(emit_state_base_address);
+
assert(brw->screen->devinfo.verx10 == GFX_VERx10);
}