summaryrefslogtreecommitdiff
path: root/src/intel/compiler/brw_fs_generator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/intel/compiler/brw_fs_generator.cpp')
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp139
1 files changed, 103 insertions, 36 deletions
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 10a12eafc76..7d891c21a5b 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -221,41 +221,109 @@ public:
int ip;
};
-bool
-fs_generator::patch_discard_jumps_to_fb_writes()
+void
+fs_generator::patch_discard_jumps_to_landing_pad(const fs_inst *inst)
{
- if (devinfo->gen < 6 || this->discard_halt_patches.is_empty())
- return false;
+ const int scale = brw_jump_scale(p->devinfo);
- int scale = brw_jump_scale(p->devinfo);
+ p->exit_insn_offset = p->next_insn_offset;
- /* There is a somewhat strange undocumented requirement of using
- * HALT, according to the simulator. If some channel has HALTed to
- * a particular UIP, then by the end of the program, every channel
- * must have HALTed to that UIP. Furthermore, the tracking is a
- * stack, so you can't do the final halt of a UIP after starting
- * halting to a new UIP.
- *
- * Symptoms of not emitting this instruction on actual hardware
- * included GPU hangs and sparkly rendering on the piglit discard
- * tests.
- */
- brw_inst *last_halt = gen6_HALT(p);
- brw_inst_set_uip(p->devinfo, last_halt, 1 * scale);
- brw_inst_set_jip(p->devinfo, last_halt, 1 * scale);
+ if (devinfo->gen >= 8) {
+ brw_push_insn_state(p);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- int ip = p->nr_insn;
+ const brw_reg_type t = (inst->exec_size > 16 ? BRW_REGISTER_TYPE_UD :
+ BRW_REGISTER_TYPE_UW);
+
+ brw_MOV(p, retype(brw_flag_reg(0, inst->flag_subreg), t),
+ retype(brw_mask_reg(0), t));
+
+ brw_pop_insn_state(p);
+
+ } else if (devinfo->gen >= 6) {
+ const brw_reg_type t = (inst->exec_size > 16 ? BRW_REGISTER_TYPE_UD :
+ BRW_REGISTER_TYPE_UW);
+ brw_inst *zero = brw_MOV(p, retype(brw_flag_reg(0, inst->flag_subreg), t),
+ brw_imm_uw(0));
+ brw_inst_set_exec_size(devinfo, zero, BRW_EXECUTE_1);
+ brw_inst_set_mask_control(devinfo, zero, BRW_MASK_DISABLE);
+
+ brw_inst *mov = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW),
+ brw_imm_uw(0));
+ brw_inst_set_cond_modifier(devinfo, mov, BRW_CONDITIONAL_Z);
+ brw_inst_set_flag_subreg_nr(devinfo, mov, inst->flag_subreg);
+ }
+ if (devinfo->gen >= 7) {
+ /* If some channel has BRCed to a particular UIP, then by the
+ * end of the program, every channel must have BRCed to that
+ * UIP. Furthermore, the tracking is a stack, so you can't do
+ * the final branch to a UIP after starting branching to a new
+ * UIP.
+ */
+ brw_inst *last_converging = gen7_BRC(p);
+ brw_inst_set_uip(p->devinfo, last_converging, scale);
+ }
+
+ const int ip = p->nr_insn;
foreach_in_list(ip_record, patch_ip, &discard_halt_patches) {
brw_inst *patch = &p->store[patch_ip->ip];
- assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT);
+ assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT ||
+ brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_BRC);
+
/* HALT takes a half-instruction distance from the pre-incremented IP. */
- brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
+ if (devinfo->gen >= 6)
+ brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
+ else
+ brw_inst_set_gen4_jump_count(devinfo, patch,
+ (ip - patch_ip->ip) * scale);
+ }
+
+ this->discard_halt_patches.make_empty();
+
+ if (devinfo->gen < 6) {
+ brw_inst *fetch = brw_AND(p, brw_flag_reg(0, inst->flag_subreg),
+ brw_mask_reg(0 /* AMASK */),
+ retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW));
+ brw_inst_set_exec_size(devinfo, fetch, BRW_EXECUTE_1);
+ brw_inst_set_mask_control(devinfo, fetch, BRW_MASK_DISABLE);
+ brw_inst_set_qtr_control(devinfo, fetch, BRW_COMPRESSION_NONE);
+ brw_inst_set_thread_control(devinfo, fetch, BRW_THREAD_SWITCH);
}
- this->discard_halt_patches.make_empty();
- return true;
+ if (devinfo->gen == 4 && !devinfo->is_g4x) {
+ /* Workaround for the following:
+ *
+ * [DevBW, DevCL] Erratum: The subfields in mask stack register are
+ * reset to zero during graphics reset, however, they are not
+ * initialized at thread dispatch. These subfields will retain the
+ * values from the previous thread. Software should make sure the mask
+ * stack is empty (reset to zero) before terminating the thread. In case
+ * that this is not practical, software may have to reset the mask stack
+ * at the beginning of each kernel, which will impact the performance.
+ *
+ * Luckily we can rely on:
+ *
+ * [DevBW, DevCL] This register access restriction is not applicable,
+ * hardware does ensure execution pipeline coherency, when a mask stack
+ * register is used as an explicit source and/or destination.
+ */
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_set_default_exec_size(p, BRW_EXECUTE_2);
+ brw_MOV(p, vec2(brw_mask_stack_depth_reg(0)), brw_imm_uw(0));
+
+ brw_set_default_exec_size(p, BRW_EXECUTE_16);
+ /* Reset the if stack. */
+ brw_MOV(p, retype(brw_mask_stack_reg(0), BRW_REGISTER_TYPE_UW),
+ brw_imm_uw(0));
+
+ brw_pop_insn_state(p);
+ }
}
void
@@ -1332,14 +1400,17 @@ fs_generator::generate_ddy(const fs_inst *inst,
void
fs_generator::generate_discard_jump(fs_inst *)
{
- assert(devinfo->gen >= 6);
-
- /* This HALT will be patched up at FB write time to point UIP at the end of
- * the program, and at brw_uip_jip() JIP will be set to the end of the
- * current block (or the program).
+ /* On Gen6+ This HALT will be patched up to point UIP at the placeholder
+ * HALT instruction in the discard landing pad, and at brw_set_uip_jip()
+ * JIP will be set to the end of the current block (or the beginning of the
+ * discard landing pad).
*/
this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
- gen6_HALT(p);
+
+ if (devinfo->gen >= 7)
+ gen7_BRC(p);
+ else
+ brw_HALT(p);
}
void
@@ -2198,15 +2269,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
break;
- case FS_OPCODE_PLACEHOLDER_HALT:
+ case FS_OPCODE_DISCARD_LANDING_PAD:
/* This is the place where the final HALT needs to be inserted if
* we've emitted any discards. If not, this will emit no code.
*/
- if (!patch_discard_jumps_to_fb_writes()) {
- if (unlikely(debug_flag)) {
- disasm_info->use_tail = true;
- }
- }
+ patch_discard_jumps_to_landing_pad(inst);
break;
case FS_OPCODE_INTERPOLATE_AT_SAMPLE: