summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2018-10-30 15:47:39 -0500
committerJason Ekstrand <jason@jlekstrand.net>2019-01-29 18:43:55 +0000
commit8514eba693c9daa07284a248e1c4e5d825152c1c (patch)
treeda0d6f216c0097e97aeb8e4dfca6fb999c691f79
parentf547cebbe062b094077ed32c8d557c7162c1c4fb (diff)
intel/fs: Use SHADER_OPCODE_SEND for texturing on gen7+
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-rw-r--r--src/intel/compiler/brw_fs.cpp138
-rw-r--r--src/intel/compiler/brw_fs.h2
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp162
-rw-r--r--src/intel/compiler/brw_schedule_instructions.cpp17
4 files changed, 177 insertions, 142 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 35e78eed7f8..d9c339b1f08 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4551,6 +4551,66 @@ is_high_sampler(const struct gen_device_info *devinfo, const fs_reg &sampler)
return sampler.file != IMM || sampler.ud >= 16;
}
+static unsigned
+sampler_msg_type(const gen_device_info *devinfo,
+ opcode opcode, bool shadow_compare)
+{
+ assert(devinfo->gen >= 5);
+ switch (opcode) {
+ case SHADER_OPCODE_TEX:
+ return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE :
+ GEN5_SAMPLER_MESSAGE_SAMPLE;
+ case FS_OPCODE_TXB:
+ return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE :
+ GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
+ case SHADER_OPCODE_TXL:
+ return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE :
+ GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
+ case SHADER_OPCODE_TXL_LZ:
+ return shadow_compare ? GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ :
+ GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
+ case SHADER_OPCODE_TXS:
+ case SHADER_OPCODE_IMAGE_SIZE:
+ return GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+ case SHADER_OPCODE_TXD:
+ assert(!shadow_compare || devinfo->gen >= 8 || devinfo->is_haswell);
+ return shadow_compare ? HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE :
+ GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
+ case SHADER_OPCODE_TXF:
+ return GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+ case SHADER_OPCODE_TXF_LZ:
+ assert(devinfo->gen >= 9);
+ return GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
+ case SHADER_OPCODE_TXF_CMS_W:
+ assert(devinfo->gen >= 9);
+ return GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
+ case SHADER_OPCODE_TXF_CMS:
+ return devinfo->gen >= 7 ? GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS :
+ GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+ case SHADER_OPCODE_TXF_UMS:
+ assert(devinfo->gen >= 7);
+ return GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
+ case SHADER_OPCODE_TXF_MCS:
+ assert(devinfo->gen >= 7);
+ return GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
+ case SHADER_OPCODE_LOD:
+ return GEN5_SAMPLER_MESSAGE_LOD;
+ case SHADER_OPCODE_TG4:
+ assert(devinfo->gen >= 7);
+ return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C :
+ GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
+ break;
+ case SHADER_OPCODE_TG4_OFFSET:
+ assert(devinfo->gen >= 7);
+ return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C :
+ GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
+ case SHADER_OPCODE_SAMPLEINFO:
+ return GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
+ default:
+ unreachable("not reached");
+ }
+}
+
static void
lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &coordinate,
@@ -4566,6 +4626,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
unsigned grad_components)
{
const gen_device_info *devinfo = bld.shader->devinfo;
+ const brw_stage_prog_data *prog_data = bld.shader->stage_prog_data;
unsigned reg_width = bld.dispatch_width() / 8;
unsigned header_size = 0, length = 0;
fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
@@ -4792,14 +4853,81 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
/* Generate the SEND. */
- inst->opcode = op;
- inst->src[0] = src_payload;
- inst->src[1] = surface;
- inst->src[2] = sampler;
- inst->resize_sources(3);
+ inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = mlen;
inst->header_size = header_size;
+ const unsigned msg_type =
+ sampler_msg_type(devinfo, op, inst->shadow_compare);
+ const unsigned simd_mode =
+ inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
+ BRW_SAMPLER_SIMD_MODE_SIMD16;
+
+ uint32_t base_binding_table_index;
+ switch (op) {
+ case SHADER_OPCODE_TG4:
+ case SHADER_OPCODE_TG4_OFFSET:
+ base_binding_table_index = prog_data->binding_table.gather_texture_start;
+ break;
+ case SHADER_OPCODE_IMAGE_SIZE:
+ base_binding_table_index = prog_data->binding_table.image_start;
+ break;
+ default:
+ base_binding_table_index = prog_data->binding_table.texture_start;
+ break;
+ }
+
+ inst->sfid = BRW_SFID_SAMPLER;
+ if (surface.file == IMM && sampler.file == IMM) {
+ inst->desc = brw_sampler_desc(devinfo,
+ surface.ud + base_binding_table_index,
+ sampler.ud % 16,
+ msg_type,
+ simd_mode,
+ 0 /* return_format unused on gen7+ */);
+ inst->src[0] = brw_imm_ud(0);
+ } else {
+ /* Immediate portion of the descriptor */
+ inst->desc = brw_sampler_desc(devinfo,
+ 0, /* surface */
+ 0, /* sampler */
+ msg_type,
+ simd_mode,
+ 0 /* return_format unused on gen7+ */);
+ const fs_builder ubld = bld.group(1, 0).exec_all();
+ fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ if (surface.equals(sampler)) {
+ /* This case is common in GL */
+ ubld.MUL(desc, surface, brw_imm_ud(0x101));
+ } else {
+ if (sampler.file == IMM) {
+ ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8));
+ } else {
+ ubld.SHL(desc, sampler, brw_imm_ud(8));
+ ubld.OR(desc, desc, surface);
+ }
+ }
+ if (base_binding_table_index)
+ ubld.ADD(desc, desc, brw_imm_ud(base_binding_table_index));
+ ubld.AND(desc, desc, brw_imm_ud(0xfff));
+
+ inst->src[0] = component(desc, 0);
+ }
+ inst->src[1] = brw_imm_ud(0); /* ex_desc */
+
+ inst->src[2] = src_payload;
+ inst->resize_sources(3);
+
+ if (inst->eot) {
+ /* EOT sampler messages don't make sense to split because it would
+ * involve ending half of the thread early.
+ */
+ assert(inst->group == 0);
+ /* We need to use SENDC for EOT sampler messages */
+ inst->check_tdr = true;
+ inst->send_has_side_effects = true;
+ }
+
/* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
}
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 093a5751e2d..4e913eb8d80 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -421,7 +421,7 @@ private:
void generate_barrier(fs_inst *inst, struct brw_reg src);
bool generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
- void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+ void generate_tex(fs_inst *inst, struct brw_reg dst,
struct brw_reg surface_index,
struct brw_reg sampler_index);
void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 0c9feb63a8c..35762b43615 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -965,10 +965,11 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
}
void
-fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
struct brw_reg surface_index,
struct brw_reg sampler_index)
{
+ assert(devinfo->gen < 7);
assert(inst->size_written % REG_SIZE == 0);
int msg_type = -1;
uint32_t simd_mode;
@@ -1037,71 +1038,26 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
break;
- case SHADER_OPCODE_TXL_LZ:
- assert(devinfo->gen >= 9);
- if (inst->shadow_compare) {
- msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ;
- } else {
- msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
- }
- break;
case SHADER_OPCODE_TXS:
- case SHADER_OPCODE_IMAGE_SIZE:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
break;
case SHADER_OPCODE_TXD:
- if (inst->shadow_compare) {
- /* Gen7.5+. Otherwise, lowered in NIR */
- assert(devinfo->gen >= 8 || devinfo->is_haswell);
- msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
- }
+ assert(!inst->shadow_compare);
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
break;
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
- case SHADER_OPCODE_TXF_LZ:
- assert(devinfo->gen >= 9);
- msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
- break;
- case SHADER_OPCODE_TXF_CMS_W:
- assert(devinfo->gen >= 9);
- msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
- break;
case SHADER_OPCODE_TXF_CMS:
- if (devinfo->gen >= 7)
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
- else
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
- break;
- case SHADER_OPCODE_TXF_UMS:
- assert(devinfo->gen >= 7);
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
- break;
- case SHADER_OPCODE_TXF_MCS:
- assert(devinfo->gen >= 7);
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_LOD:
msg_type = GEN5_SAMPLER_MESSAGE_LOD;
break;
case SHADER_OPCODE_TG4:
- if (inst->shadow_compare) {
- assert(devinfo->gen >= 7);
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
- } else {
- assert(devinfo->gen >= 6);
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
- }
- break;
- case SHADER_OPCODE_TG4_OFFSET:
- assert(devinfo->gen >= 7);
- if (inst->shadow_compare) {
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
- } else {
- msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
- }
+ assert(devinfo->gen == 6);
+ assert(!inst->shadow_compare);
+ msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
break;
case SHADER_OPCODE_SAMPLEINFO:
msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
@@ -1180,16 +1136,14 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
dst = vec16(dst);
}
- assert(devinfo->gen < 7 || inst->header_size == 0 ||
- src.file == BRW_GENERAL_REGISTER_FILE);
-
assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
/* Load the message header if present. If there's a texture offset,
* we need to set it up explicitly and load the offset bitfield.
* Otherwise, we can use an implied move from g0 to the first message reg.
*/
- if (inst->header_size != 0 && devinfo->gen < 7) {
+ struct brw_reg src = brw_null_reg();
+ if (inst->header_size != 0) {
if (devinfo->gen < 6 && !inst->offset) {
/* Set up an implied move from g0 to the MRF. */
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -1218,83 +1172,28 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
uint32_t base_binding_table_index;
switch (inst->opcode) {
case SHADER_OPCODE_TG4:
- case SHADER_OPCODE_TG4_OFFSET:
base_binding_table_index = prog_data->binding_table.gather_texture_start;
break;
- case SHADER_OPCODE_IMAGE_SIZE:
- base_binding_table_index = prog_data->binding_table.image_start;
- break;
default:
base_binding_table_index = prog_data->binding_table.texture_start;
break;
}
- if (surface_index.file == BRW_IMMEDIATE_VALUE &&
- sampler_index.file == BRW_IMMEDIATE_VALUE) {
- uint32_t surface = surface_index.ud;
- uint32_t sampler = sampler_index.ud;
-
- brw_SAMPLE(p,
- retype(dst, BRW_REGISTER_TYPE_UW),
- inst->base_mrf,
- src,
- surface + base_binding_table_index,
- sampler % 16,
- msg_type,
- inst->size_written / REG_SIZE,
- inst->mlen,
- inst->header_size != 0,
- simd_mode,
- return_format);
- } else {
- /* Non-const sampler index */
-
- struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
- struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD));
- struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
-
- brw_push_insn_state(p);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_set_default_access_mode(p, BRW_ALIGN_1);
- brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ assert(surface_index.file == BRW_IMMEDIATE_VALUE);
+ assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
- if (brw_regs_equal(&surface_reg, &sampler_reg)) {
- brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
- } else {
- if (sampler_reg.file == BRW_IMMEDIATE_VALUE) {
- brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8));
- } else {
- brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
- brw_OR(p, addr, addr, surface_reg);
- }
- }
- if (base_binding_table_index)
- brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
- brw_AND(p, addr, addr, brw_imm_ud(0xfff));
-
- brw_pop_insn_state(p);
-
- /* dst = send(offset, a0.0 | <descriptor>) */
- brw_send_indirect_message(
- p, BRW_SFID_SAMPLER, dst, src, addr,
- brw_message_desc(devinfo, inst->mlen, inst->size_written / REG_SIZE,
- inst->header_size) |
- brw_sampler_desc(devinfo,
- 0 /* surface */,
- 0 /* sampler */,
- msg_type,
- simd_mode,
- return_format));
-
- /* visitor knows more than we do about the surface limit required,
- * so has already done marking.
- */
- }
-
- if (is_combined_send) {
- brw_inst_set_eot(p->devinfo, brw_last_inst, true);
- brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
- }
+ brw_SAMPLE(p,
+ retype(dst, BRW_REGISTER_TYPE_UW),
+ inst->base_mrf,
+ src,
+ surface_index.ud + base_binding_table_index,
+ sampler_index.ud % 16,
+ msg_type,
+ inst->size_written / REG_SIZE,
+ inst->mlen,
+ inst->header_size != 0,
+ simd_mode,
+ return_format);
}
@@ -2170,23 +2069,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
- case SHADER_OPCODE_TXF_LZ:
case SHADER_OPCODE_TXF_CMS:
- case SHADER_OPCODE_TXF_CMS_W:
- case SHADER_OPCODE_TXF_UMS:
- case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TXL:
- case SHADER_OPCODE_TXL_LZ:
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_LOD:
case SHADER_OPCODE_TG4:
- case SHADER_OPCODE_TG4_OFFSET:
case SHADER_OPCODE_SAMPLEINFO:
- generate_tex(inst, dst, src[0], src[1], src[2]);
- break;
-
- case SHADER_OPCODE_IMAGE_SIZE:
- generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
+ assert(inst->src[0].file == BAD_FILE);
+ generate_tex(inst, dst, src[1], src[2]);
break;
case FS_OPCODE_DDX_COARSE:
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
index f453bb42574..46d3111045d 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -416,6 +416,23 @@ schedule_node::set_latency_gen7(bool is_haswell)
case SHADER_OPCODE_SEND:
switch (inst->sfid) {
+ case BRW_SFID_SAMPLER: {
+ unsigned msg_type = (inst->desc >> 12) & 0x1f;
+ switch (msg_type) {
+ case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
+ case GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
+ /* See also SHADER_OPCODE_TXS */
+ latency = 100;
+ break;
+
+ default:
+ /* See also SHADER_OPCODE_TEX */
+ latency = 200;
+ break;
+ }
+ break;
+ }
+
case GEN6_SFID_DATAPORT_RENDER_CACHE:
switch ((inst->desc >> 14) & 0x1f) {
case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE: