summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp109
1 files changed, 72 insertions, 37 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1842d56786d..b89c6721ea0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4198,9 +4198,6 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
coordinate_done = true;
break;
case SHADER_OPCODE_TG4_OFFSET:
- /* gather4_po_c should have been lowered in SIMD16 mode. */
- assert(bld.dispatch_width() == 8 || shadow_c.file == BAD_FILE);
-
/* More crazy intermixing */
for (unsigned i = 0; i < 2; i++) /* u, v */
bld.MOV(sources[length++], offset(coordinate, bld, i));
@@ -4687,6 +4684,67 @@ get_fpu_lowered_simd_width(const struct brw_device_info *devinfo,
}
/**
+ * Get the maximum allowed SIMD width for instruction \p inst accounting for
+ * various payload size restrictions that apply to sampler message
+ * instructions.
+ *
+ * This is only intended to provide a maximum theoretical bound for the
+ * execution size of the message based on the number of argument components
+ * alone, which in most cases will determine whether the SIMD8 or SIMD16
+ * variant of the message can be used, though some messages may have
+ * additional restrictions not accounted for here (e.g. pre-ILK hardware uses
+ * the message length to determine the exact SIMD width and argument count,
+ * which makes a number of sampler message combinations impossible to
+ * represent).
+ */
+static unsigned
+get_sampler_lowered_simd_width(const struct brw_device_info *devinfo,
+ const fs_inst *inst)
+{
+ /* Calculate the number of coordinate components that have to be present
+ * assuming that additional arguments follow the texel coordinates in the
+ * message payload. On IVB+ there is no need for padding, on ILK-SNB we
+ * need to pad to four or three components depending on the message,
+ * pre-ILK we need to pad to at most three components.
+ */
+ const unsigned req_coord_components =
+ (devinfo->gen >= 7 ||
+ !inst->components_read(TEX_LOGICAL_SRC_COORDINATE)) ? 0 :
+ (devinfo->gen >= 5 && inst->opcode != SHADER_OPCODE_TXF_LOGICAL &&
+ inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL) ? 4 :
+ 3;
+
+ /* On Gen9+ the LOD argument is for free if we're able to use the LZ
+ * variant of the TXL or TXF message.
+ */
+ const bool implicit_lod = devinfo->gen >= 9 &&
+ (inst->opcode == SHADER_OPCODE_TXL ||
+ inst->opcode == SHADER_OPCODE_TXF) &&
+ inst->src[TEX_LOGICAL_SRC_LOD].is_zero();
+
+ /* Calculate the total number of argument components that need to be passed
+ * to the sampler unit.
+ */
+ const unsigned num_payload_components =
+ MAX2(inst->components_read(TEX_LOGICAL_SRC_COORDINATE),
+ req_coord_components) +
+ inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) +
+ (implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) +
+ inst->components_read(TEX_LOGICAL_SRC_LOD2) +
+ inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +
+ (inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ?
+ inst->components_read(TEX_LOGICAL_SRC_OFFSET_VALUE) : 0) +
+ inst->components_read(TEX_LOGICAL_SRC_MCS);
+
+ /* SIMD16 messages with more than five arguments exceed the maximum message
+ * size supported by the sampler, regardless of whether a header is
+ * provided or not.
+ */
+ return MIN2(inst->exec_size,
+ num_payload_components > MAX_SAMPLER_MESSAGE_SIZE / 2 ? 8 : 16);
+}
+
+/**
* Get the closest native SIMD width supported by the hardware for instruction
* \p inst. The instruction will be left untouched by
* fs_visitor::lower_simd_width() if the returned value is equal to the
@@ -4861,31 +4919,24 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
case SHADER_OPCODE_LOD_LOGICAL:
case SHADER_OPCODE_TG4_LOGICAL:
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
- return MIN2(16, inst->exec_size);
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
+ case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+ return get_sampler_lowered_simd_width(devinfo, inst);
case SHADER_OPCODE_TXD_LOGICAL:
/* TXD is unsupported in SIMD16 mode. */
return 8;
- case SHADER_OPCODE_TG4_OFFSET_LOGICAL: {
- /* gather4_po_c is unsupported in SIMD16 mode. */
- const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
- return (shadow_c.file != BAD_FILE ? 8 : MIN2(16, inst->exec_size));
- }
case SHADER_OPCODE_TXL_LOGICAL:
- case FS_OPCODE_TXB_LOGICAL: {
- /* Gen4 doesn't have SIMD8 non-shadow-compare bias/LOD instructions, and
- * Gen4-6 can't support TXL and TXB with shadow comparison in SIMD16
- * mode because the message exceeds the maximum length of 11.
+ case FS_OPCODE_TXB_LOGICAL:
+ /* Only one execution size is representable pre-ILK depending on whether
+ * the shadow reference argument is present.
*/
- const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
- if (devinfo->gen == 4 && shadow_c.file == BAD_FILE)
- return 16;
- else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE)
- return 8;
+ if (devinfo->gen == 4)
+ return inst->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE ? 16 : 8;
else
- return MIN2(16, inst->exec_size);
- }
+ return get_sampler_lowered_simd_width(devinfo, inst);
+
case SHADER_OPCODE_TXF_LOGICAL:
case SHADER_OPCODE_TXS_LOGICAL:
/* Gen4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD
@@ -4894,23 +4945,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
if (devinfo->gen == 4)
return 16;
else
- return MIN2(16, inst->exec_size);
-
- case SHADER_OPCODE_TXF_CMS_W_LOGICAL: {
- /* This opcode can take up to 6 arguments which means that in some
- * circumstances it can end up with a message that is too long in SIMD16
- * mode.
- */
- const unsigned coord_components =
- inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
- /* First three arguments are the sample index and the two arguments for
- * the MCS data.
- */
- if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE)
- return 8;
- else
- return MIN2(16, inst->exec_size);
- }
+ return get_sampler_lowered_simd_width(devinfo, inst);
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: