diff options
author | Kenneth Graunke <kenneth@whitecape.org> | 2021-10-20 15:59:40 -0700 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2021-10-28 19:45:56 +0000 |
commit | 148ea65ee1554dcdb5aad37184b56cad2d3e622a (patch) | |
tree | 40773c01ef5d0a7b86632f2f7e890ae348fa306a /src/mesa/drivers/dri | |
parent | 0a64007676ff5cdfa46fc25a479d1b4b48ca9b31 (diff) |
i965: Port STATE_BASE_ADDRESS to genxml and fix bugs
This largely copies crocus's code for this (but with Gfx9+ handling).
This version also fixes missing MOCS settings on several platforms,
which we hadn't noticed were missing.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13480>
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_misc_state.c | 196 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_blorp_exec.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_state_upload.c | 158 |
6 files changed, 161 insertions, 200 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index bf00fe01607..579dcecad05 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -710,6 +710,7 @@ struct brw_context void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, struct brw_bo *bo, uint32_t offset, uint64_t imm); + void (*emit_state_base_address)(struct brw_context *brw); } vtbl; struct brw_bufmgr *bufmgr; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 5b4ad94c605..53eac1c178f 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -726,199 +726,3 @@ brw_upload_invariant_state(struct brw_context *brw) ADVANCE_BATCH(); } } - -/** - * Define the base addresses which some state is referenced from. - * - * This allows us to avoid having to emit relocations for the objects, - * and is actually required for binding table pointers on gfx6. - * - * Surface state base address covers binding table pointers and - * surface state objects, but not the surfaces that the surface state - * objects point to. - */ -void -brw_upload_state_base_address(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (brw->batch.state_base_address_emitted) - return; - - /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of - * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be - * programmed prior to STATE_BASE_ADDRESS. - * - * However, given that the instruction SBA (general state base - * address) on this chipset is always set to 0 across X and GL, - * maybe this isn't required for us in particular. - */ - - uint32_t mocs = brw_mocs(&brw->isl_dev, NULL); - - if (devinfo->ver >= 6) { - const unsigned dc_flush = - devinfo->ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0; - - /* Emit a render target cache flush. - * - * This isn't documented anywhere in the PRM. However, it seems to be - * necessary prior to changing the surface state base adress. We've - * seen issues in Vulkan where we get GPU hangs when using multi-level - * command buffers which clear depth, reset state base address, and then - * go render stuff. - * - * Normally, in GL, we would trust the kernel to do sufficient stalls - * and flushes prior to executing our batch. However, it doesn't seem - * as if the kernel's flushing is always sufficient and we don't want to - * rely on it. - * - * We make this an end-of-pipe sync instead of a normal flush because we - * do not know the current status of the GPU. On Haswell at least, - * having a fast-clear operation in flight at the same time as a normal - * rendering operation can cause hangs. Since the kernel's flushing is - * insufficient, we need to ensure that any rendering operations from - * other processes are definitely complete before we try to do our own - * rendering. It's a bit of a big hammer but it appears to work. - */ - brw_emit_end_of_pipe_sync(brw, - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - dc_flush); - } - - if (devinfo->ver >= 8) { - /* STATE_BASE_ADDRESS has issues with 48-bit address spaces. If the - * address + size as seen by STATE_BASE_ADDRESS overflows 48 bits, - * the GPU appears to treat all accesses to the buffer as being out - * of bounds and returns zero. To work around this, we pin all SBAs - * to the bottom 4GB. - */ - int pkt_len = devinfo->ver >= 10 ? 22 : (devinfo->ver >= 9 ? 19 : 16); - - BEGIN_BATCH(pkt_len); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (pkt_len - 2)); - /* General state base address: stateless DP read/write requests */ - OUT_BATCH(mocs << 4 | 1); - OUT_BATCH(0); - OUT_BATCH(mocs << 16); - /* Surface state base address: */ - OUT_RELOC64(brw->batch.state.bo, RELOC_32BIT, mocs << 4 | 1); - /* Dynamic state base address: */ - OUT_RELOC64(brw->batch.state.bo, RELOC_32BIT, mocs << 4 | 1); - /* Indirect object base address: MEDIA_OBJECT data */ - OUT_BATCH(mocs << 4 | 1); - OUT_BATCH(0); - /* Instruction base address: shader kernels (incl. SIP) */ - OUT_RELOC64(brw->cache.bo, RELOC_32BIT, mocs << 4 | 1); - /* General state buffer size */ - OUT_BATCH(0xfffff001); - /* Dynamic state buffer size */ - OUT_BATCH(ALIGN(MAX_STATE_SIZE, 4096) | 1); - /* Indirect object upper bound */ - OUT_BATCH(0xfffff001); - /* Instruction access upper bound */ - OUT_BATCH(ALIGN(brw->cache.bo->size, 4096) | 1); - if (devinfo->ver >= 9) { - OUT_BATCH(1); - OUT_BATCH(0); - OUT_BATCH(0); - } - if (devinfo->ver >= 10) { - OUT_BATCH(1); - OUT_BATCH(0); - OUT_BATCH(0); - } - ADVANCE_BATCH(); - } else if (devinfo->ver >= 6) { - BEGIN_BATCH(10); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); - OUT_BATCH(mocs << 8 | /* General State Memory Object Control State */ - mocs << 4 | /* Stateless Data Port Access Memory Object Control State */ - 1); /* General State Base Address Modify Enable */ - /* Surface state base address: - * BINDING_TABLE_STATE - * SURFACE_STATE - */ - OUT_RELOC(brw->batch.state.bo, 0, 1); - /* Dynamic state base address: - * SAMPLER_STATE - * SAMPLER_BORDER_COLOR_STATE - * CLIP, SF, WM/CC viewport state - * COLOR_CALC_STATE - * DEPTH_STENCIL_STATE - * BLEND_STATE - * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset - * Disable is clear, which we rely on) - */ - OUT_RELOC(brw->batch.state.bo, 0, 1); - - OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ - - /* Instruction base address: shader kernels (incl. SIP) */ - OUT_RELOC(brw->cache.bo, 0, 1); - - OUT_BATCH(1); /* General state upper bound */ - /* Dynamic state upper bound. Although the documentation says that - * programming it to zero will cause it to be ignored, that is a lie. - * If this isn't programmed to a real bound, the sampler border color - * pointer is rejected, causing border color to mysteriously fail. - */ - OUT_BATCH(0xfffff001); - OUT_BATCH(1); /* Indirect object upper bound */ - OUT_BATCH(1); /* Instruction access upper bound */ - ADVANCE_BATCH(); - } else if (devinfo->ver == 5) { - BEGIN_BATCH(8); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); - OUT_BATCH(1); /* General state base address */ - OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_RELOC(brw->cache.bo, 0, 1); /* Instruction base address */ - OUT_BATCH(0xfffff001); /* General state upper bound */ - OUT_BATCH(1); /* Indirect object upper bound */ - OUT_BATCH(1); /* Instruction access upper bound */ - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(6); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); - OUT_BATCH(1); /* General state base address */ - OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* General state upper bound */ - OUT_BATCH(1); /* Indirect object upper bound */ - ADVANCE_BATCH(); - } - - if (devinfo->ver >= 6) { - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - } - - /* According to section 3.6.1 of VOL1 of the 965 PRM, - * STATE_BASE_ADDRESS updates require a reissue of: - * - * 3DSTATE_PIPELINE_POINTERS - * 3DSTATE_BINDING_TABLE_POINTERS - * MEDIA_STATE_POINTERS - * - * and this continues through Ironlake. The Sandy Bridge PRM, vol - * 1 part 1 says that the folowing packets must be reissued: - * - * 3DSTATE_CC_POINTERS - * 3DSTATE_BINDING_TABLE_POINTERS - * 3DSTATE_SAMPLER_STATE_POINTERS - * 3DSTATE_VIEWPORT_STATE_POINTERS - * MEDIA_STATE_POINTERS - * - * Those are always reissued following SBA updates anyway (new - * batch time), except in the case of the program cache BO - * changing. Having a separate state flag makes the sequence more - * obvious. - */ - - brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS; - brw->batch.state_base_address_emitted = true; -} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index a16d8c82f16..17d9e543842 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -142,8 +142,6 @@ void brw_upload_invariant_state(struct brw_context *brw); uint32_t brw_depthbuffer_format(struct brw_context *brw); -void brw_upload_state_base_address(struct brw_context *brw); - /* gfx8_depth_state.c */ void gfx8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 8b2c1dedaf5..ca0118ed217 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -683,7 +683,7 @@ brw_upload_pipeline_state(struct brw_context *brw, brw_upload_programs(brw, pipeline); merge_ctx_state(brw, &state); - brw_upload_state_base_address(brw); + brw->vtbl.emit_state_base_address(brw); const struct brw_tracked_state *atoms = brw_get_pipeline_atoms(brw, pipeline); diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 83a93d0e65f..aed53d939d2 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -337,7 +337,7 @@ retry: brw_emit_post_sync_nonzero_flush(brw); #endif - brw_upload_state_base_address(brw); + brw->vtbl.emit_state_base_address(brw); #if GFX_VER >= 8 gfx7_l3_state.emit(brw); diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index d514dfaa246..7fed8356ef0 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -98,6 +98,162 @@ emit_lri(struct brw_context *brw, uint32_t reg, uint32_t imm) #endif /** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations for the objects, + * and is actually required for binding table pointers on Gfx6. + * + * Surface state base address covers binding table pointers and surface state + * objects, but not the surfaces that the surface state objects point to. + */ +static void +genX(emit_state_base_address)(struct brw_context *brw) +{ + if (brw->batch.state_base_address_emitted) + return; + + /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of + * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be + * programmed prior to STATE_BASE_ADDRESS. + * + * However, given that the instruction SBA (general state base + * address) on this chipset is always set to 0 across X and GL, + * maybe this isn't required for us in particular. + */ + + UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL); + + /* Flush before updating STATE_BASE_ADDRESS */ +#if GFX_VER >= 6 + const unsigned dc_flush = + GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0; + + /* Emit a render target cache flush. + * + * This isn't documented anywhere in the PRM. However, it seems to be + * necessary prior to changing the surface state base adress. We've + * seen issues in Vulkan where we get GPU hangs when using multi-level + * command buffers which clear depth, reset state base address, and then + * go render stuff. + * + * Normally, in GL, we would trust the kernel to do sufficient stalls + * and flushes prior to executing our batch. However, it doesn't seem + * as if the kernel's flushing is always sufficient and we don't want to + * rely on it. + * + * We make this an end-of-pipe sync instead of a normal flush because we + * do not know the current status of the GPU. On Haswell at least, + * having a fast-clear operation in flight at the same time as a normal + * rendering operation can cause hangs. Since the kernel's flushing is + * insufficient, we need to ensure that any rendering operations from + * other processes are definitely complete before we try to do our own + * rendering. It's a bit of a big hammer but it appears to work. + */ + brw_emit_end_of_pipe_sync(brw, + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + dc_flush); +#endif + + brw_batch_emit(brw, GENX(STATE_BASE_ADDRESS), sba) { + /* Set base addresses */ + sba.GeneralStateBaseAddressModifyEnable = true; + +#if GFX_VER >= 6 + sba.DynamicStateBaseAddressModifyEnable = true; + sba.DynamicStateBaseAddress = ro_bo(brw->batch.state.bo, 0); +#endif + + sba.SurfaceStateBaseAddressModifyEnable = true; + sba.SurfaceStateBaseAddress = ro_bo(brw->batch.state.bo, 0); + + sba.IndirectObjectBaseAddressModifyEnable = true; + +#if GFX_VER >= 5 + sba.InstructionBaseAddressModifyEnable = true; + sba.InstructionBaseAddress = ro_bo(brw->cache.bo, 0); +#endif + + /* Set buffer sizes on Gfx8+ or upper bounds on Gfx4-7 */ +#if GFX_VER >= 8 + sba.GeneralStateBufferSize = 0xfffff; + sba.IndirectObjectBufferSize = 0xfffff; + sba.InstructionBufferSize = 0xfffff; + sba.DynamicStateBufferSize = MAX_STATE_SIZE; + + sba.GeneralStateBufferSizeModifyEnable = true; + sba.DynamicStateBufferSizeModifyEnable = true; + sba.IndirectObjectBufferSizeModifyEnable = true; + sba.InstructionBuffersizeModifyEnable = true; +#else + sba.GeneralStateAccessUpperBoundModifyEnable = true; + sba.IndirectObjectAccessUpperBoundModifyEnable = true; + +#if GFX_VER >= 5 + sba.InstructionAccessUpperBoundModifyEnable = true; +#endif + +#if GFX_VER >= 6 + /* Dynamic state upper bound. Although the documentation says that + * programming it to zero will cause it to be ignored, that is a lie. + * If this isn't programmed to a real bound, the sampler border color + * pointer is rejected, causing border color to mysteriously fail. + */ + sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000); + sba.DynamicStateAccessUpperBoundModifyEnable = true; +#else + /* Same idea but using General State Base Address on Gfx4-5 */ + sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000); +#endif +#endif + +#if GFX_VER >= 6 + /* The hardware appears to pay attention to the MOCS fields even + * if you don't set the "Address Modify Enable" bit for the base. + */ + sba.GeneralStateMOCS = mocs; + sba.StatelessDataPortAccessMOCS = mocs; + sba.DynamicStateMOCS = mocs; + sba.IndirectObjectMOCS = mocs; + sba.InstructionMOCS = mocs; + sba.SurfaceStateMOCS = mocs; +#endif + } + + /* Flush after updating STATE_BASE_ADDRESS */ +#if GFX_VER >= 6 + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_INSTRUCTION_INVALIDATE | + PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); +#endif + + /* According to section 3.6.1 of VOL1 of the 965 PRM, + * STATE_BASE_ADDRESS updates require a reissue of: + * + * 3DSTATE_PIPELINE_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * MEDIA_STATE_POINTERS + * + * and this continues through Ironlake. The Sandy Bridge PRM, vol + * 1 part 1 says that the folowing packets must be reissued: + * + * 3DSTATE_CC_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * 3DSTATE_SAMPLER_STATE_POINTERS + * 3DSTATE_VIEWPORT_STATE_POINTERS + * MEDIA_STATE_POINTERS + * + * Those are always reissued following SBA updates anyway (new + * batch time), except in the case of the program cache BO + * changing. Having a separate state flag makes the sequence more + * obvious. + */ + brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS; + brw->batch.state_base_address_emitted = true; +} + +/** * Polygon stipple packet */ static void @@ -5918,5 +6074,7 @@ genX(init_atoms)(struct brw_context *brw) brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker); #endif + brw->vtbl.emit_state_base_address = genX(emit_state_base_address); + assert(brw->screen->devinfo.verx10 == GFX_VERx10); } |