diff options
Diffstat (limited to 'src/intel/compiler/brw_fs_generator.cpp')
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 139 |
1 files changed, 103 insertions, 36 deletions
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 10a12eafc76..7d891c21a5b 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -221,41 +221,109 @@ public: int ip; }; -bool -fs_generator::patch_discard_jumps_to_fb_writes() +void +fs_generator::patch_discard_jumps_to_landing_pad(const fs_inst *inst) { - if (devinfo->gen < 6 || this->discard_halt_patches.is_empty()) - return false; + const int scale = brw_jump_scale(p->devinfo); - int scale = brw_jump_scale(p->devinfo); + p->exit_insn_offset = p->next_insn_offset; - /* There is a somewhat strange undocumented requirement of using - * HALT, according to the simulator. If some channel has HALTed to - * a particular UIP, then by the end of the program, every channel - * must have HALTed to that UIP. Furthermore, the tracking is a - * stack, so you can't do the final halt of a UIP after starting - * halting to a new UIP. - * - * Symptoms of not emitting this instruction on actual hardware - * included GPU hangs and sparkly rendering on the piglit discard - * tests. - */ - brw_inst *last_halt = gen6_HALT(p); - brw_inst_set_uip(p->devinfo, last_halt, 1 * scale); - brw_inst_set_jip(p->devinfo, last_halt, 1 * scale); + if (devinfo->gen >= 8) { + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); - int ip = p->nr_insn; + const brw_reg_type t = (inst->exec_size > 16 ? BRW_REGISTER_TYPE_UD : + BRW_REGISTER_TYPE_UW); + + brw_MOV(p, retype(brw_flag_reg(0, inst->flag_subreg), t), + retype(brw_mask_reg(0), t)); + + brw_pop_insn_state(p); + + } else if (devinfo->gen >= 6) { + const brw_reg_type t = (inst->exec_size > 16 ? BRW_REGISTER_TYPE_UD : + BRW_REGISTER_TYPE_UW); + brw_inst *zero = brw_MOV(p, retype(brw_flag_reg(0, inst->flag_subreg), t), + brw_imm_uw(0)); + brw_inst_set_exec_size(devinfo, zero, BRW_EXECUTE_1); + brw_inst_set_mask_control(devinfo, zero, BRW_MASK_DISABLE); + + brw_inst *mov = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), + brw_imm_uw(0)); + brw_inst_set_cond_modifier(devinfo, mov, BRW_CONDITIONAL_Z); + brw_inst_set_flag_subreg_nr(devinfo, mov, inst->flag_subreg); + } + if (devinfo->gen >= 7) { + /* If some channel has BRCed to a particular UIP, then by the + * end of the program, every channel must have BRCed to that + * UIP. Furthermore, the tracking is a stack, so you can't do + * the final branch to a UIP after starting branching to a new + * UIP. + */ + brw_inst *last_converging = gen7_BRC(p); + brw_inst_set_uip(p->devinfo, last_converging, scale); + } + + const int ip = p->nr_insn; foreach_in_list(ip_record, patch_ip, &discard_halt_patches) { brw_inst *patch = &p->store[patch_ip->ip]; - assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT); + assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT || + brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_BRC); + /* HALT takes a half-instruction distance from the pre-incremented IP. */ - brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); + if (devinfo->gen >= 6) + brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); + else + brw_inst_set_gen4_jump_count(devinfo, patch, + (ip - patch_ip->ip) * scale); + } + + this->discard_halt_patches.make_empty(); + + if (devinfo->gen < 6) { + brw_inst *fetch = brw_AND(p, brw_flag_reg(0, inst->flag_subreg), + brw_mask_reg(0 /* AMASK */), + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW)); + brw_inst_set_exec_size(devinfo, fetch, BRW_EXECUTE_1); + brw_inst_set_mask_control(devinfo, fetch, BRW_MASK_DISABLE); + brw_inst_set_qtr_control(devinfo, fetch, BRW_COMPRESSION_NONE); + brw_inst_set_thread_control(devinfo, fetch, BRW_THREAD_SWITCH); } - this->discard_halt_patches.make_empty(); - return true; + if (devinfo->gen == 4 && !devinfo->is_g4x) { + /* Workaround for the following: + * + * [DevBW, DevCL] Erratum: The subfields in mask stack register are + * reset to zero during graphics reset, however, they are not + * initialized at thread dispatch. These subfields will retain the + * values from the previous thread. Software should make sure the mask + * stack is empty (reset to zero) before terminating the thread. In case + * that this is not practical, software may have to reset the mask stack + * at the beginning of each kernel, which will impact the performance. + * + * Luckily we can rely on: + * + * [DevBW, DevCL] This register access restriction is not applicable, + * hardware does ensure execution pipeline coherency, when a mask stack + * register is used as an explicit source and/or destination. + */ + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + + brw_set_default_exec_size(p, BRW_EXECUTE_2); + brw_MOV(p, vec2(brw_mask_stack_depth_reg(0)), brw_imm_uw(0)); + + brw_set_default_exec_size(p, BRW_EXECUTE_16); + /* Reset the if stack. */ + brw_MOV(p, retype(brw_mask_stack_reg(0), BRW_REGISTER_TYPE_UW), + brw_imm_uw(0)); + + brw_pop_insn_state(p); + } } void @@ -1332,14 +1400,17 @@ fs_generator::generate_ddy(const fs_inst *inst, void fs_generator::generate_discard_jump(fs_inst *) { - assert(devinfo->gen >= 6); - - /* This HALT will be patched up at FB write time to point UIP at the end of - * the program, and at brw_uip_jip() JIP will be set to the end of the - * current block (or the program). + /* On Gen6+ This HALT will be patched up to point UIP at the placeholder + * HALT instruction in the discard landing pad, and at brw_set_uip_jip() + * JIP will be set to the end of the current block (or the beginning of the + * discard landing pad). */ this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn)); - gen6_HALT(p); + + if (devinfo->gen >= 7) + gen7_BRC(p); + else + brw_HALT(p); } void @@ -2198,15 +2269,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_pack_half_2x16_split(inst, dst, src[0], src[1]); break; - case FS_OPCODE_PLACEHOLDER_HALT: + case FS_OPCODE_DISCARD_LANDING_PAD: /* This is the place where the final HALT needs to be inserted if * we've emitted any discards. If not, this will emit no code. */ - if (!patch_discard_jumps_to_fb_writes()) { - if (unlikely(debug_flag)) { - disasm_info->use_tail = true; - } - } + patch_discard_jumps_to_landing_pad(inst); break; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: |