diff options
author | Francisco Jerez <currojerez@riseup.net> | 2014-10-28 15:59:34 +0200 |
---|---|---|
committer | Francisco Jerez <currojerez@riseup.net> | 2014-10-30 16:39:53 +0200 |
commit | a841b3c0cb61b11f993eaa52e75ae72daa4d5fa4 (patch) | |
tree | 859e614042badaee0feeb510f6f3fbc089ccb421 | |
parent | d46cf50e4ce13b478544de223ec64302ab832d59 (diff) |
i965: Unify most of the visiting code in the VEC4 and FS visitors.i965-unified-visitor
The VEC4 and FS visitor classes are still huge and there's still a lot that
could be unified -- Most of what is left doesn't have much to do with visiting
though.
36 files changed, 3760 insertions, 6993 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 9c006daa0e3..d61193f8970 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -70,6 +70,7 @@ i965_FILES = \ brw_gs_state.c \ brw_gs_surface_state.c \ brw_interpolation_map.c \ + brw_ir_visitor.cpp \ brw_lower_texture_gradients.cpp \ brw_lower_unnormalized_offset.cpp \ brw_meta_updownsample.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index bb49a0ae955..7af127f5fee 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -143,7 +143,7 @@ bblock_t::combine_with(bblock_t *that) } void -bblock_t::dump(backend_visitor *v) const +bblock_t::dump(brw::base_visitor *v) const { int ip = this->start_ip; foreach_inst_in_block(backend_instruction, inst, this) { @@ -422,7 +422,7 @@ cfg_t::make_block_array() } void -cfg_t::dump(backend_visitor *v) const +cfg_t::dump(brw::base_visitor *v) const { foreach_block (block, this) { fprintf(stderr, "START B%d", block->num); diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index c06ed61a79f..6e27027e41a 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -31,6 +31,10 @@ #include "brw_shader.h" +#ifdef __cplusplus +#include "brw_ir_visitor.h" +#endif + struct bblock_t; struct bblock_link { @@ -60,7 +64,7 @@ struct bblock_t { bool is_successor_of(const bblock_t *block) const; bool can_combine_with(const bblock_t *that) const; void combine_with(bblock_t *that); - void dump(backend_visitor *v) const; + void dump(brw::base_visitor *v) const; backend_instruction *start(); const backend_instruction *start() const; @@ -204,7 +208,7 @@ struct cfg_t { void set_next_block(bblock_t **cur, bblock_t *block, int ip); void make_block_array(); - void dump(backend_visitor *v) const; + void dump(brw::base_visitor *v) const; #endif void *mem_ctx; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 884e28bf8b4..4a1ffdc5b8a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -757,20 +757,20 @@ struct brw_tracked_state { void (*emit)( struct brw_context *brw ); }; +enum shader_time_shader_entry { + ST_BASE, + ST_WRITTEN, + ST_RESET, + ST_SUM, + ST_NUM_ENTRIES +}; + enum shader_time_shader_type { ST_NONE, - ST_VS, - ST_VS_WRITTEN, - ST_VS_RESET, - ST_GS, - ST_GS_WRITTEN, - ST_GS_RESET, - ST_FS8, - ST_FS8_WRITTEN, - ST_FS8_RESET, - ST_FS16, - ST_FS16_WRITTEN, - ST_FS16_RESET, + ST_VS = ST_NONE + ST_NUM_ENTRIES, + ST_GS = ST_VS + ST_NUM_ENTRIES, + ST_FS8 = ST_GS + ST_NUM_ENTRIES, + ST_FS16 = ST_FS8 + ST_NUM_ENTRIES }; /* Flags for brw->state.cache. diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 4c9d7b95db8..be66c9efcb4 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -36,7 +36,7 @@ * - if/else/endif */ bool -dead_control_flow_eliminate(backend_visitor *v) +dead_control_flow_eliminate(brw::base_visitor *v) { bool progress = false; diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h index 57a4dabc83c..1824fb98c33 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h @@ -23,4 +23,4 @@ #include "brw_shader.h" -bool dead_control_flow_eliminate(backend_visitor *v); +bool dead_control_flow_eliminate(brw::base_visitor *v); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2943f042dd0..2cf2294960b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -124,7 +124,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, case GRF: case HW_REG: case MRF: - this->regs_written = (dst.width * dst.stride * type_sz(dst.type) + 31) / 32; + this->regs_written = (MAX2(dst.width * dst.stride, 1) * + type_sz(dst.type) + 31) / 32; break; case BAD_FILE: this->regs_written = 0; @@ -228,7 +229,7 @@ fs_inst::resize_sources(uint8_t num_sources) if (this->sources != num_sources) { fs_reg *src = new fs_reg[MAX2(num_sources, 3)]; - for (unsigned i = 0; i < MIN2(this->sources, num_sources); ++i) + for (int i = 0; i < MIN2(this->sources, num_sources); ++i) src[i] = this->src[i]; delete[] this->src; @@ -237,236 +238,6 @@ fs_inst::resize_sources(uint8_t num_sources) } } -#define ALU1(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0); \ - } - -#define ALU2(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \ - } - -#define ALU2_ACC(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1) \ - { \ - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\ - inst->writes_accumulator = true; \ - return inst; \ - } - -#define ALU3(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1, const fs_reg &src2) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\ - } - -ALU1(NOT) -ALU1(MOV) -ALU1(FRC) -ALU1(RNDD) -ALU1(RNDE) -ALU1(RNDZ) -ALU2(ADD) -ALU2(MUL) -ALU2_ACC(MACH) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHL) -ALU2(SHR) -ALU2(ASR) -ALU3(LRP) -ALU1(BFREV) -ALU3(BFE) -ALU2(BFI1) -ALU3(BFI2) -ALU1(FBH) -ALU1(FBL) -ALU1(CBIT) -ALU3(MAD) -ALU2_ACC(ADDC) -ALU2_ACC(SUBB) -ALU2(SEL) -ALU2(MAC) - -/** Gen4 predicated IF. */ -fs_inst * -fs_visitor::IF(enum brw_predicate predicate) -{ - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width); - inst->predicate = predicate; - return inst; -} - -/** Gen6 IF with embedded comparison. */ -fs_inst * -fs_visitor::IF(const fs_reg &src0, const fs_reg &src1, - enum brw_conditional_mod condition) -{ - assert(brw->gen == 6); - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width, - reg_null_d, src0, src1); - inst->conditional_mod = condition; - return inst; -} - -/** - * CMP: Sets the low bit of the destination channels with the result - * of the comparison, while the upper bits are undefined, and updates - * the flag register with the packed 16 bits of the result. - */ -fs_inst * -fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, - enum brw_conditional_mod condition) -{ - fs_inst *inst; - - /* Take the instruction: - * - * CMP null<d> src0<f> src1<f> - * - * Original gen4 does type conversion to the destination type before - * comparison, producing garbage results for floating point comparisons. - * gen5 does the comparison on the execution type (resolved source types), - * so dst type doesn't matter. gen6 does comparison and then uses the - * result as if it was the dst type with no conversion, which happens to - * mostly work out for float-interpreted-as-int since our comparisons are - * for >0, =0, <0. - */ - if (brw->gen == 4) { - dst.type = src0.type; - if (dst.file == HW_REG) - dst.fixed_hw_reg.type = dst.type; - } - - resolve_ud_negate(&src0); - resolve_ud_negate(&src1); - - inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1); - inst->conditional_mod = condition; - - return inst; -} - -fs_inst * -fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources) -{ - uint8_t exec_size = dst.width; - for (int i = 0; i < sources; ++i) { - assert(src[i].width % dst.width == 0); - if (src[i].width > exec_size) - exec_size = src[i].width; - } - - fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, exec_size, - dst, src, sources); - inst->regs_written = 0; - for (int i = 0; i < sources; ++i) { - /* The LOAD_PAYLOAD instruction only really makes sense if we are - * dealing with whole registers. If this ever changes, we can deal - * with it later. - */ - int size = src[i].effective_width * type_sz(src[i].type); - assert(size % 32 == 0); - inst->regs_written += (size + 31) / 32; - } - - return inst; -} - -exec_list -fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, - const fs_reg &surf_index, - const fs_reg &varying_offset, - uint32_t const_offset) -{ - exec_list instructions; - fs_inst *inst; - - /* We have our constant surface use a pitch of 4 bytes, so our index can - * be any component of a vector, and then we load 4 contiguous - * components starting from that. - * - * We break down the const_offset to a portion added to the variable - * offset and a portion done using reg_offset, which means that if you - * have GLSL using something like "uniform vec4 a[20]; gl_FragColor = - * a[i]", we'll temporarily generate 4 vec4 loads from offset i * 4, and - * CSE can later notice that those loads are all the same and eliminate - * the redundant ones. - */ - fs_reg vec4_offset = fs_reg(this, glsl_type::int_type); - instructions.push_tail(ADD(vec4_offset, - varying_offset, fs_reg(const_offset & ~3))); - - int scale = 1; - if (brw->gen == 4 && dst.width == 8) { - /* Pre-gen5, we can either use a SIMD8 message that requires (header, - * u, v, r) as parameters, or we can just use the SIMD16 message - * consisting of (header, u). We choose the second, at the cost of a - * longer return length. - */ - scale = 2; - } - - enum opcode op; - if (brw->gen >= 7) - op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7; - else - op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD; - - assert(dst.width % 8 == 0); - int regs_written = 4 * (dst.width / 8) * scale; - fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), - dst.type, dst.width); - inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset); - inst->regs_written = regs_written; - instructions.push_tail(inst); - - if (brw->gen < 7) { - inst->base_mrf = 13; - inst->header_present = true; - if (brw->gen == 4) - inst->mlen = 3; - else - inst->mlen = 1 + dispatch_width / 8; - } - - fs_reg result = offset(vec4_result, (const_offset & 3) * scale); - instructions.push_tail(MOV(dst, result)); - - return instructions; -} - -/** - * A helper for MOV generation for fixing up broken hardware SEND dependency - * handling. - */ -fs_inst * -fs_visitor::DEP_RESOLVE_MOV(int grf) -{ - fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); - - inst->ir = NULL; - inst->annotation = "send dependency resolve"; - - /* The caller always wants uncompressed to emit the minimal extra - * dependencies, and to avoid having to deal with aligning its regs to 2. - */ - inst->exec_size = 8; - - return inst; -} - bool fs_inst::equals(fs_inst *inst) const { @@ -632,186 +403,6 @@ fs_reg::is_contiguous() const return stride == 1; } -bool -fs_reg::is_valid_3src() const -{ - return file == GRF || file == UNIFORM; -} - -int -fs_visitor::type_size(const struct glsl_type *type) -{ - unsigned int size, i; - - switch (type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - return type->components(); - case GLSL_TYPE_ARRAY: - return type_size(type->fields.array) * type->length; - case GLSL_TYPE_STRUCT: - size = 0; - for (i = 0; i < type->length; i++) { - size += type_size(type->fields.structure[i].type); - } - return size; - case GLSL_TYPE_SAMPLER: - /* Samplers take up no register space, since they're baked in at - * link time. - */ - return 0; - case GLSL_TYPE_ATOMIC_UINT: - return 0; - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - unreachable("not reached"); - } - - return 0; -} - -fs_reg -fs_visitor::get_timestamp() -{ - assert(brw->gen >= 7); - - fs_reg ts = fs_reg(retype(brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_TIMESTAMP, - 0), - BRW_REGISTER_TYPE_UD)); - - fs_reg dst = fs_reg(this, glsl_type::uint_type); - - fs_inst *mov = emit(MOV(dst, ts)); - /* We want to read the 3 fields we care about (mostly field 0, but also 2) - * even if it's not enabled in the dispatch. - */ - mov->force_writemask_all = true; - mov->exec_size = 8; - - /* The caller wants the low 32 bits of the timestamp. Since it's running - * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, - * which is plenty of time for our purposes. It is identical across the - * EUs, but since it's tracking GPU core speed it will increment at a - * varying rate as render P-states change. - * - * The caller could also check if render P-states have changed (or anything - * else that might disrupt timing) by setting smear to 2 and checking if - * that field is != 0. - */ - dst.set_smear(0); - - return dst; -} - -void -fs_visitor::emit_shader_time_begin() -{ - current_annotation = "shader time start"; - shader_start_time = get_timestamp(); -} - -void -fs_visitor::emit_shader_time_end() -{ - current_annotation = "shader time end"; - - enum shader_time_shader_type type, written_type, reset_type; - if (dispatch_width == 8) { - type = ST_FS8; - written_type = ST_FS8_WRITTEN; - reset_type = ST_FS8_RESET; - } else { - assert(dispatch_width == 16); - type = ST_FS16; - written_type = ST_FS16_WRITTEN; - reset_type = ST_FS16_RESET; - } - - fs_reg shader_end_time = get_timestamp(); - - /* Check that there weren't any timestamp reset events (assuming these - * were the only two timestamp reads that happened). - */ - fs_reg reset = shader_end_time; - reset.set_smear(2); - fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u))); - test->conditional_mod = BRW_CONDITIONAL_Z; - emit(IF(BRW_PREDICATE_NORMAL)); - - push_force_uncompressed(); - fs_reg start = shader_start_time; - start.negate = true; - fs_reg diff = fs_reg(this, glsl_type::uint_type); - emit(ADD(diff, start, shader_end_time)); - - /* If there were no instructions between the two timestamp gets, the diff - * is 2 cycles. Remove that overhead, so I can forget about that when - * trying to determine the time taken for single instructions. - */ - emit(ADD(diff, diff, fs_reg(-2u))); - - emit_shader_time_write(type, diff); - emit_shader_time_write(written_type, fs_reg(1u)); - emit(BRW_OPCODE_ELSE); - emit_shader_time_write(reset_type, fs_reg(1u)); - emit(BRW_OPCODE_ENDIF); - - pop_force_uncompressed(); -} - -void -fs_visitor::emit_shader_time_write(enum shader_time_shader_type type, - fs_reg value) -{ - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, type); - fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE); - - fs_reg payload; - if (dispatch_width == 8) - payload = fs_reg(this, glsl_type::uvec2_type); - else - payload = fs_reg(this, glsl_type::uint_type); - - emit(new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD, - fs_reg(), payload, offset, value)); -} - -void -fs_visitor::vfail(const char *format, va_list va) -{ - char *msg; - - if (failed) - return; - - failed = true; - - msg = ralloc_vasprintf(mem_ctx, format, va); - msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg); - - this->fail_msg = msg; - - if (INTEL_DEBUG & DEBUG_WM) { - fprintf(stderr, "%s", msg); - } -} - -void -fs_visitor::fail(const char *format, ...) -{ - va_list va; - - va_start(va, format); - vfail(format, va); - va_end(va); -} - /** * Mark this program as impossible to compile in SIMD16 mode. * @@ -844,58 +435,6 @@ fs_visitor::no16(const char *format, ...) va_end(va); } -fs_inst * -fs_visitor::emit(enum opcode opcode) -{ - return emit(new(mem_ctx) fs_inst(opcode, dispatch_width)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1, const fs_reg &src2) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, - fs_reg src[], int sources) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src, sources)); -} - -void -fs_visitor::push_force_uncompressed() -{ - force_uncompressed_stack++; -} - -void -fs_visitor::pop_force_uncompressed() -{ - force_uncompressed_stack--; - assert(force_uncompressed_stack >= 0); -} - /** * Returns true if the instruction has a flag that means it won't * update an entire destination register. @@ -958,67 +497,6 @@ fs_inst::writes_flag() const opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS; } -/** - * Returns how many MRFs an FS opcode will write over. - * - * Note that this is not the 0 or 1 implied writes in an actual gen - * instruction -- the FS opcodes often generate MOVs in addition. - */ -int -fs_visitor::implied_mrf_writes(fs_inst *inst) -{ - if (inst->mlen == 0) - return 0; - - if (inst->base_mrf == -1) - return 0; - - switch (inst->opcode) { - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - return 1 * dispatch_width / 8; - case SHADER_OPCODE_POW: - case SHADER_OPCODE_INT_QUOTIENT: - case SHADER_OPCODE_INT_REMAINDER: - return 2 * dispatch_width / 8; - case SHADER_OPCODE_TEX: - case FS_OPCODE_TXB: - case SHADER_OPCODE_TXD: - case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXF_MCS: - case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: - case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXS: - case SHADER_OPCODE_LOD: - return 1; - case FS_OPCODE_FB_WRITE: - return 2; - case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - case SHADER_OPCODE_GEN4_SCRATCH_READ: - return 1; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: - return inst->mlen; - case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - return 2; - case SHADER_OPCODE_UNTYPED_ATOMIC: - case SHADER_OPCODE_UNTYPED_SURFACE_READ: - case FS_OPCODE_INTERPOLATE_AT_CENTROID: - case FS_OPCODE_INTERPOLATE_AT_SAMPLE: - case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: - case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: - return 0; - default: - unreachable("not reached"); - } -} - /** Fixed HW reg constructor. */ fs_reg::fs_reg(enum register_file file, int reg) { @@ -1078,12 +556,6 @@ fs_reg::fs_reg(fs_visitor *v, const struct glsl_type *type) assert(this->width == 8 || this->width == 16); } -fs_reg * -fs_visitor::variable_storage(ir_variable *var) -{ - return (fs_reg *)hash_table_find(this->variable_ht, var); -} - void import_uniforms_callback(const void *key, void *data, @@ -1110,82 +582,102 @@ fs_visitor::import_uniforms(fs_visitor *v) this->push_constant_loc = v->push_constant_loc; this->pull_constant_loc = v->pull_constant_loc; this->uniforms = v->uniforms; - this->param_size = v->param_size; + this->uniform_size = v->uniform_size; } -/* Our support for uniforms is piggy-backed on the struct - * gl_fragment_program, because that's where the values actually - * get stored, rather than in some global gl_shader_program uniform - * store. +/** + * A helper for MOV generation for fixing up broken hardware SEND dependency + * handling. */ -void -fs_visitor::setup_uniform_values(ir_variable *ir) +fs_inst * +fs_visitor::DEP_RESOLVE_MOV(int grf) { - int namelen = strlen(ir->name); + fs_inst *inst = bld.MOV(brw_null_reg(), + fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); - /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_driver_storage structs for each subcomponent that - * glGetUniformLocation() could name. We know it's been set up in the same - * order we'd walk the type, so walk the list of storage and find anything - * with our name, or the prefix of a component that starts with our name. + inst->ir = NULL; + inst->annotation = "send dependency resolve"; + + /* The caller always wants uncompressed to emit the minimal extra + * dependencies, and to avoid having to deal with aligning its regs to 2. */ - unsigned params_before = uniforms; - for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { - struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; - - if (strncmp(ir->name, storage->name, namelen) != 0 || - (storage->name[namelen] != 0 && - storage->name[namelen] != '.' && - storage->name[namelen] != '[')) { - continue; - } + inst->exec_size = 8; - unsigned slots = storage->type->component_slots(); - if (storage->array_elements) - slots *= storage->array_elements; + return inst; +} - for (unsigned i = 0; i < slots; i++) { - stage_prog_data->param[uniforms++] = &storage->storage[i]; +void +fs_visitor::emit_pull_constant_load(brw::fs_builder &bld, + const fs_reg &dst, + const fs_reg &surf_index, + uint32_t off, + const fs_reg *reladdr, + unsigned num_components) +{ + if (reladdr) { + /* We have our constant surface use a pitch of 4 bytes, so our index can + * be any component of a vector, and then we load 4 contiguous + * components starting from that. + */ + fs_reg addr = bld.scalar_reg(BRW_REGISTER_TYPE_D); + bld.ADD(fs_reg(addr), *reladdr, fs_reg((off / 4) & ~3)); + + int scale = 1; + if (brw->gen == 4 && dst.width == 8) { + /* Pre-gen5, we can either use a SIMD8 message that requires (header, + * u, v, r) as parameters, or we can just use the SIMD16 message + * consisting of (header, u). We choose the second, at the cost of a + * longer return length. + */ + scale = 2; } - } - /* Make sure we actually initialized the right amount of stuff here. */ - assert(params_before + ir->type->component_slots() == uniforms); - (void)params_before; -} + enum opcode op; + if (brw->gen >= 7) + op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7; + else + op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD; + assert(dst.width % 8 == 0); + int regs_written = 4 * (dst.width / 8) * scale; + fs_reg result = bld.scalar_reg(dst.type, regs_written); + instruction *inst = bld.emit(op, result, surf_index, addr); -/* Our support for builtin uniforms is even scarier than non-builtin. - * It sits on top of the PROG_STATE_VAR parameters that are - * automatically updated from GL context state. - */ -void -fs_visitor::setup_builtin_uniform_values(ir_variable *ir) -{ - const ir_state_slot *const slots = ir->get_state_slots(); - assert(slots != NULL); + inst->regs_written = regs_written; - for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { - /* This state reference has already been setup by ir_to_mesa, but we'll - * get the same index back here. - */ - int index = _mesa_add_state_reference(this->prog->Parameters, - (gl_state_index *)slots[i].tokens); + if (brw->gen < 7) { + inst->base_mrf = 13; + inst->header_present = true; + if (brw->gen == 4) + inst->mlen = 3; + else + inst->mlen = 1 + dispatch_width / 8; + } - /* Add each of the unique swizzles of the element as a parameter. - * This'll end up matching the expected layout of the - * array/matrix/structure we're trying to fill in. - */ - int last_swiz = -1; - for (unsigned int j = 0; j < 4; j++) { - int swiz = GET_SWZ(slots[i].swizzle, j); - if (swiz == last_swiz) - break; - last_swiz = swiz; + for (unsigned i = 0; i < num_components; ++i) + bld.MOV(offset(dst, i), offset(fs_reg(result), + (((off / 4) & 3) + i) * scale)); - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][swiz]; + } else { + brw::fs_builder ubld = bld.force_uncompressed(); + fs_reg result = bld.scalar_reg(dst.type); + fs_reg addr; + + if (brw->gen >= 8) { + /* Store the offset in a GRF so we can send-from-GRF. */ + addr = bld.scalar_reg(BRW_REGISTER_TYPE_D); + ubld.MOV(fs_reg(addr), fs_reg(off & ~15)); + } else { + /* Immediates are fine on older generations since they'll be moved + * to a (potentially fake) MRF at the generator level. + */ + addr = fs_reg(off & ~15); } + + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, result, surf_index, addr); + + for (unsigned i = 0; i < num_components; ++i) + bld.MOV(offset(dst, i), component(result, ((off / 4) & 3) + i)); } } @@ -1200,15 +692,15 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) /* gl_FragCoord.x */ if (ir->data.pixel_center_integer) { - emit(MOV(wpos, this->pixel_x)); + bld.MOV(wpos, this->pixel_x); } else { - emit(ADD(wpos, this->pixel_x, fs_reg(0.5f))); + bld.ADD(wpos, this->pixel_x, fs_reg(0.5f)); } wpos = offset(wpos, 1); /* gl_FragCoord.y */ if (!flip && ir->data.pixel_center_integer) { - emit(MOV(wpos, this->pixel_y)); + bld.MOV(wpos, this->pixel_y); } else { fs_reg pixel_y = this->pixel_y; float offset = (ir->data.pixel_center_integer ? 0.0 : 0.5); @@ -1218,15 +710,15 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) offset += key->drawable_height - 1.0; } - emit(ADD(wpos, pixel_y, fs_reg(offset))); + bld.ADD(wpos, pixel_y, fs_reg(offset)); } wpos = offset(wpos, 1); /* gl_FragCoord.z */ if (brw->gen >= 6) { - emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); + bld.MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))); } else { - emit(FS_OPCODE_LINTERP, wpos, + bld.emit(FS_OPCODE_LINTERP, wpos, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], interp_reg(VARYING_SLOT_POS, 2)); @@ -1234,7 +726,7 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) wpos = offset(wpos, 1); /* gl_FragCoord.w: Already set up in emit_interpolation */ - emit(BRW_OPCODE_MOV, wpos, this->wpos_w); + bld.emit(BRW_OPCODE_MOV, wpos, this->wpos_w); return reg; } @@ -1269,7 +761,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp, */ barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; } - return emit(FS_OPCODE_LINTERP, attr, + return bld.emit(FS_OPCODE_LINTERP, attr, this->delta_x[barycoord_mode], this->delta_y[barycoord_mode], interp); } @@ -1323,7 +815,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) struct brw_reg interp = interp_reg(location, k); interp = suboffset(interp, 3); interp.type = reg->type; - emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); + bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); attr = offset(attr, 1); } } else { @@ -1336,7 +828,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) * unlit, replace the centroid data with non-centroid * data. */ - emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); fs_inst *inst; inst = emit_linterp(attr, fs_reg(interp), interpolation_mode, @@ -1360,7 +852,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) ir->data.sample || key->persample_shading); } if (brw->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { - emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); + bld.emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); } attr = offset(attr, 1); } @@ -1393,7 +885,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); g0.negate = true; - emit(ASR(*reg, g0, fs_reg(15))); + bld.ASR(*reg, g0, fs_reg(15)); } else { /* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create * a boolean result from this (1/true or 0/false). @@ -1410,8 +902,8 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); g1_6.negate = true; - emit(ASR(asr, g1_6, fs_reg(31))); - emit(AND(*reg, asr, fs_reg(1))); + bld.ASR(asr, g1_6, fs_reg(31)); + bld.AND(*reg, asr, fs_reg(1)); } return reg; @@ -1426,9 +918,9 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) if (key->compute_pos_offset) { /* Convert int_sample_pos to floating point */ - emit(MOV(dst, int_sample_pos)); + bld.MOV(dst, int_sample_pos); /* Scale to the range [0, 1] */ - emit(MUL(dst, dst, fs_reg(1 / 16.0f))); + bld.MUL(dst, dst, fs_reg(1 / 16.0f)); } else { /* From ARB_sample_shading specification: @@ -1436,7 +928,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) * rasterization is disabled, gl_SamplePosition will always be * (0.5, 0.5). */ - emit(MOV(dst, fs_reg(0.5f))); + bld.MOV(dst, fs_reg(0.5f)); } } @@ -1445,7 +937,7 @@ fs_visitor::emit_samplepos_setup() { assert(brw->gen >= 6); - this->current_annotation = "compute sample position"; + bld.set_annotation("compute sample position"); fs_reg *reg = new(this->mem_ctx) fs_reg(this, glsl_type::vec2_type); fs_reg pos = *reg; fs_reg int_sample_x = fs_reg(this, glsl_type::int_type); @@ -1467,21 +959,21 @@ fs_visitor::emit_samplepos_setup() BRW_REGISTER_TYPE_B), 16, 8, 2); if (dispatch_width == 8) { - emit(MOV(int_sample_x, fs_reg(sample_pos_reg))); + bld.MOV(int_sample_x, fs_reg(sample_pos_reg)); } else { - emit(MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg))); - emit(MOV(half(int_sample_x, 1), fs_reg(suboffset(sample_pos_reg, 16)))) + bld.MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg)); + bld.MOV(half(int_sample_x, 1), fs_reg(suboffset(sample_pos_reg, 16))) ->force_sechalf = true; } /* Compute gl_SamplePosition.x */ compute_sample_position(pos, int_sample_x); pos = offset(pos, 1); if (dispatch_width == 8) { - emit(MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1)))); + bld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1))); } else { - emit(MOV(half(int_sample_y, 0), - fs_reg(suboffset(sample_pos_reg, 1)))); - emit(MOV(half(int_sample_y, 1), fs_reg(suboffset(sample_pos_reg, 17)))) + bld.MOV(half(int_sample_y, 0), + fs_reg(suboffset(sample_pos_reg, 1))); + bld.MOV(half(int_sample_y, 1), fs_reg(suboffset(sample_pos_reg, 17))) ->force_sechalf = true; } /* Compute gl_SamplePosition.y */ @@ -1496,7 +988,7 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir) brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; assert(brw->gen >= 6); - this->current_annotation = "compute sample id"; + bld.set_annotation("compute sample id"); fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); if (key->compute_sample_id) { @@ -1524,130 +1016,30 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir) * subspan 1, and finally sample 1 of subspan 1. */ fs_inst *inst; - inst = emit(BRW_OPCODE_AND, t1, + inst = bld.emit(BRW_OPCODE_AND, t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), fs_reg(0xc0)); inst->force_writemask_all = true; - inst = emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5)); + inst = bld.emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5)); inst->force_writemask_all = true; /* This works for both SIMD8 and SIMD16 */ - inst = emit(MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210))); + inst = bld.MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)); inst->force_writemask_all = true; /* This special instruction takes care of setting vstride=1, * width=4, hstride=0 of t2 during an ADD instruction. */ - emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2); + bld.emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2); } else { /* As per GL_ARB_sample_shading specification: * "When rendering to a non-multisample buffer, or if multisample * rasterization is disabled, gl_SampleID will always be zero." */ - emit(BRW_OPCODE_MOV, *reg, fs_reg(0)); + bld.emit(BRW_OPCODE_MOV, *reg, fs_reg(0)); } return reg; } -fs_reg -fs_visitor::fix_math_operand(fs_reg src) -{ - /* Can't do hstride == 0 args on gen6 math, so expand it out. We - * might be able to do better by doing execsize = 1 math and then - * expanding that result out, but we would need to be careful with - * masking. - * - * The hardware ignores source modifiers (negate and abs) on math - * instructions, so we also move to a temp to set those up. - */ - if (brw->gen == 6 && src.file != UNIFORM && src.file != IMM && - !src.abs && !src.negate) - return src; - - /* Gen7 relaxes most of the above restrictions, but still can't use IMM - * operands to math - */ - if (brw->gen >= 7 && src.file != IMM) - return src; - - fs_reg expanded = fs_reg(this, glsl_type::float_type); - expanded.type = src.type; - emit(BRW_OPCODE_MOV, expanded, src); - return expanded; -} - -fs_inst * -fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) -{ - switch (opcode) { - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - break; - default: - unreachable("not reached: bad math opcode"); - } - - /* Can't do hstride == 0 args to gen6 math, so expand it out. We - * might be able to do better by doing execsize = 1 math and then - * expanding that result out, but we would need to be careful with - * masking. - * - * Gen 6 hardware ignores source modifiers (negate and abs) on math - * instructions, so we also move to a temp to set those up. - */ - if (brw->gen == 6 || brw->gen == 7) - src = fix_math_operand(src); - - fs_inst *inst = emit(opcode, dst, src); - - if (brw->gen < 6) { - inst->base_mrf = 2; - inst->mlen = dispatch_width / 8; - } - - return inst; -} - -fs_inst * -fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) -{ - int base_mrf = 2; - fs_inst *inst; - - if (brw->gen >= 8) { - inst = emit(opcode, dst, src0, src1); - } else if (brw->gen >= 6) { - src0 = fix_math_operand(src0); - src1 = fix_math_operand(src1); - - inst = emit(opcode, dst, src0, src1); - } else { - /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 - * "Message Payload": - * - * "Operand0[7]. For the INT DIV functions, this operand is the - * denominator." - * ... - * "Operand1[7]. For the INT DIV functions, this operand is the - * numerator." - */ - bool is_int_div = opcode != SHADER_OPCODE_POW; - fs_reg &op0 = is_int_div ? src1 : src0; - fs_reg &op1 = is_int_div ? src0 : src1; - - emit(MOV(fs_reg(MRF, base_mrf + 1, op1.type, dispatch_width), op1)); - inst = emit(opcode, dst, op0, reg_null_f); - - inst->base_mrf = base_mrf; - inst->mlen = 2 * dispatch_width / 8; - } - return inst; -} - void fs_visitor::assign_curb_setup() { @@ -2069,9 +1461,9 @@ fs_visitor::move_uniform_array_access_to_pull_constants() if (pull_constant_loc[uniform] == -1) { const gl_constant_value **values = &stage_prog_data->param[uniform]; - assert(param_size[uniform]); + assert(uniform_size[uniform]); - for (int j = 0; j < param_size[uniform]; j++) { + for (int j = 0; j < uniform_size[uniform]; j++) { pull_constant_loc[uniform + j] = stage_prog_data->nr_pull_params; stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] = @@ -2187,34 +1579,23 @@ fs_visitor::demote_pull_constants() continue; /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; + bld.set_base_ir(inst->ir); + bld.set_annotation(inst->annotation); + brw::fs_builder ibld = bld.at(block, inst); fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start); - fs_reg dst = fs_reg(this, glsl_type::float_type); + fs_reg dst = ibld.scalar_reg(BRW_REGISTER_TYPE_F); /* Generate a pull load into dst. */ - if (inst->src[i].reladdr) { - exec_list list = VARYING_PULL_CONSTANT_LOAD(dst, - surf_index, - *inst->src[i].reladdr, - pull_index); - inst->insert_before(block, &list); - inst->src[i].reladdr = NULL; - } else { - fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); - fs_inst *pull = - new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, - dst, surf_index, offset); - inst->insert_before(block, pull); - inst->src[i].set_smear(pull_index & 3); - } + emit_pull_constant_load(ibld, dst, surf_index, pull_index * 4, + inst->src[i].reladdr, 1); /* Rewrite the instruction to use the temporary VGRF. */ inst->src[i].file = GRF; inst->src[i].reg = dst.reg; inst->src[i].reg_offset = 0; inst->src[i].width = dispatch_width; + inst->src[i].reladdr = NULL; } } invalidate_live_intervals(); @@ -2573,13 +1954,13 @@ fs_visitor::emit_repclear_shader() int base_mrf = 1; int color_mrf = base_mrf + 2; - fs_inst *mov = emit(MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F))); + fs_inst *mov = bld.MOV(vec4(brw_message_reg(color_mrf)), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); mov->force_writemask_all = true; - fs_inst *write; + fs_inst *write = NULL; if (key->nr_color_regions == 1) { - write = emit(FS_OPCODE_REP_FB_WRITE); + write = bld.emit(FS_OPCODE_REP_FB_WRITE); write->saturate = key->clamp_fragment_color; write->base_mrf = color_mrf; write->target = 0; @@ -2587,7 +1968,7 @@ fs_visitor::emit_repclear_shader() write->mlen = 1; } else { for (int i = 0; i < key->nr_color_regions; ++i) { - write = emit(FS_OPCODE_REP_FB_WRITE); + write = bld.emit(FS_OPCODE_REP_FB_WRITE); write->saturate = key->clamp_fragment_color; write->base_mrf = base_mrf; write->target = i; @@ -2597,6 +1978,7 @@ fs_visitor::emit_repclear_shader() } write->eot = true; + bld = bld.at(NULL, NULL); calculate_cfg(); assign_constant_locations(); @@ -2983,6 +2365,7 @@ fs_visitor::lower_load_payload() if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { assert(inst->dst.file == MRF || inst->dst.file == GRF); + brw::fs_builder ibld = bld.at(block, inst); fs_reg dst = inst->dst; for (int i = 0; i < inst->sources; i++) { @@ -3001,13 +2384,11 @@ fs_visitor::lower_load_payload() compr4_dst.width = 16; fs_reg compr4_src = inst->src[i]; compr4_src.width = 16; - fs_inst *mov = MOV(compr4_dst, compr4_src); - mov->force_writemask_all = true; - inst->insert_before(block, mov); + brw::exec_all(ibld.MOV(compr4_dst, compr4_src)); /* Mark i+4 as BAD_FILE so we don't emit a MOV for it */ inst->src[i + 4].file = BAD_FILE; } else { - fs_inst *mov = MOV(dst, inst->src[i]); + fs_inst *mov = ibld.MOV(dst, inst->src[i]); if (inst->src[i].file == GRF) { int src_reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset; @@ -3029,7 +2410,6 @@ fs_visitor::lower_load_payload() metadata[dst_reg + 1].force_sechalf = true; } } - inst->insert_before(block, mov); } dst = offset(dst, 1); @@ -3267,34 +2647,6 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "\n"); } -/** - * Possibly returns an instruction that set up @param reg. - * - * Sometimes we want to take the result of some expression/variable - * dereference tree and rewrite the instruction generating the result - * of the tree. When processing the tree, we know that the - * instructions generated are all writing temporaries that are dead - * outside of this tree. So, if we have some instructions that write - * a temporary, we're free to point that temp write somewhere else. - * - * Note that this doesn't guarantee that the instruction generated - * only reg -- it might be the size=4 destination of a texture instruction. - */ -fs_inst * -fs_visitor::get_instruction_generating_reg(fs_inst *start, - fs_inst *end, - const fs_reg ®) -{ - if (end == start || - end->is_partial_write() || - reg.reladdr || - !reg.equals(end->dst)) { - return NULL; - } else { - return end; - } -} - void fs_visitor::setup_payload_gen6() { @@ -3480,7 +2832,7 @@ fs_visitor::run() (stage == MESA_SHADER_FRAGMENT) && ((brw_wm_prog_key*) this->key)->alpha_test_func; if (uses_kill || alpha_test_func) { - fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + fs_inst *discard_init = bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); discard_init->flag_subreg = 1; } @@ -3489,24 +2841,25 @@ fs_visitor::run() */ if (shader) { foreach_in_list(ir_instruction, ir, shader->base.ir) { - base_ir = ir; + bld.set_base_ir(ir); this->result = reg_undef; ir->accept(this); } } else { emit_fragment_program_code(); } - base_ir = NULL; + bld.set_base_ir(NULL); if (failed) return false; - emit(FS_OPCODE_PLACEHOLDER_HALT); + bld.emit(FS_OPCODE_PLACEHOLDER_HALT); if (alpha_test_func) emit_alpha_test(); emit_fb_writes(); + bld = bld.at(NULL, NULL); calculate_cfg(); split_virtual_grfs(); @@ -3526,7 +2879,7 @@ fs_visitor::run() snprintf(filename, 64, "fs%d-%04d-%02d-%02d-" #pass, \ dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ \ - backend_visitor::dump_instructions(filename); \ + brw::base_visitor::dump_instructions(filename); \ } \ \ progress = progress || this_progress; \ @@ -3537,7 +2890,7 @@ fs_visitor::run() snprintf(filename, 64, "fs%d-%04d-00-start", dispatch_width, shader_prog ? shader_prog->Name : 0); - backend_visitor::dump_instructions(filename); + brw::base_visitor::dump_instructions(filename); } bool progress; @@ -3622,7 +2975,6 @@ fs_visitor::run() } } } - assert(force_uncompressed_stack == 0); /* This must come after all optimization and register allocation, since * it inserts dead code that happens to have side effects, and it does diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f38db3b8abb..3982838dd51 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -28,7 +28,7 @@ #pragma once #include "brw_shader.h" -#include "brw_ir_fs.h" +#include "brw_ir_visitor.h" extern "C" { @@ -42,7 +42,6 @@ extern "C" { #include "program/prog_optimize.h" #include "util/register_allocate.h" #include "program/sampler.h" -#include "program/hash_table.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -81,13 +80,9 @@ public: * * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR. */ -class fs_visitor : public backend_visitor +class fs_visitor : public brw::backend_visitor<fs_visitor, brw::fs_builder> { public: - const fs_reg reg_null_f; - const fs_reg reg_null_d; - const fs_reg reg_null_ud; - fs_visitor(struct brw_context *brw, void *mem_ctx, const struct brw_wm_prog_key *key, @@ -95,98 +90,23 @@ public: struct gl_shader_program *shader_prog, struct gl_fragment_program *fp, unsigned dispatch_width); - ~fs_visitor(); void init(); - fs_reg *variable_storage(ir_variable *var); void import_uniforms(fs_visitor *v); void visit(ir_variable *ir); - void visit(ir_assignment *ir); - void visit(ir_dereference_variable *ir); - void visit(ir_dereference_record *ir); - void visit(ir_dereference_array *ir); - void visit(ir_expression *ir); - void visit(ir_texture *ir); - void visit(ir_if *ir); - void visit(ir_constant *ir); - void visit(ir_swizzle *ir); - void visit(ir_return *ir); - void visit(ir_loop *ir); - void visit(ir_loop_jump *ir); void visit(ir_discard *ir); - void visit(ir_call *ir); - void visit(ir_function *ir); - void visit(ir_function_signature *ir); void visit(ir_emit_vertex *); void visit(ir_end_primitive *); - uint32_t gather_channel(ir_texture *ir, uint32_t sampler); - void swizzle_result(ir_texture *ir, fs_reg orig_val, uint32_t sampler); - - fs_inst *emit(fs_inst *inst); - void emit(exec_list list); - - fs_inst *emit(enum opcode opcode); - fs_inst *emit(enum opcode opcode, const fs_reg &dst); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, - fs_reg src[], int sources); - - fs_inst *MOV(const fs_reg &dst, const fs_reg &src); - fs_inst *NOT(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDD(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDE(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDZ(const fs_reg &dst, const fs_reg &src); - fs_inst *FRC(const fs_reg &dst, const fs_reg &src); - fs_inst *ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MUL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MACH(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MAC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SHL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SHR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *ASR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *AND(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *OR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *XOR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *IF(enum brw_predicate predicate); - fs_inst *IF(const fs_reg &src0, const fs_reg &src1, - enum brw_conditional_mod condition); - fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1, - enum brw_conditional_mod condition); - fs_inst *LRP(const fs_reg &dst, const fs_reg &a, const fs_reg &y, - const fs_reg &x); - fs_inst *DEP_RESOLVE_MOV(int grf); - fs_inst *BFREV(const fs_reg &dst, const fs_reg &value); - fs_inst *BFE(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset, - const fs_reg &value); - fs_inst *BFI1(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset); - fs_inst *BFI2(const fs_reg &dst, const fs_reg &bfi1_dst, - const fs_reg &insert, const fs_reg &base); - fs_inst *FBH(const fs_reg &dst, const fs_reg &value); - fs_inst *FBL(const fs_reg &dst, const fs_reg &value); - fs_inst *CBIT(const fs_reg &dst, const fs_reg &value); - fs_inst *MAD(const fs_reg &dst, const fs_reg &c, const fs_reg &b, - const fs_reg &a); - fs_inst *ADDC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SUBB(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - - int type_size(const struct glsl_type *type); - fs_inst *get_instruction_generating_reg(fs_inst *start, - fs_inst *end, - const fs_reg ®); - - fs_inst *LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources); - - exec_list VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, - const fs_reg &surf_index, - const fs_reg &varying_offset, - uint32_t const_offset); + dst_reg + temporary_reg(const glsl_type *type) + { + return bld.scalar_reg(brw_type_for_base_type(type), + type_size(type)); + } + + instruction *DEP_RESOLVE_MOV(int grf); bool run(); void assign_binding_table_offsets(); @@ -233,15 +153,10 @@ public: fs_inst *inst); void insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst); - void vfail(const char *msg, va_list args); - void fail(const char *msg, ...); void no16(const char *msg, ...); void lower_uniform_pull_constant_loads(); bool lower_load_payload(); - void push_force_uncompressed(); - void pop_force_uncompressed(); - void emit_dummy_fs(); void emit_repclear_shader(); fs_reg *emit_fragcoord_interpolation(ir_variable *ir); @@ -255,34 +170,26 @@ public: void emit_interpolation_setup_gen4(); void emit_interpolation_setup_gen6(); void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); - fs_reg rescale_texcoord(ir_texture *ir, fs_reg coordinate, - bool is_rect, uint32_t sampler, int texunit); fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_comp, fs_reg lod, fs_reg lod2, - uint32_t sampler); - fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_comp, fs_reg lod, fs_reg lod2, - fs_reg sample_index, uint32_t sampler); - fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_comp, fs_reg lod, fs_reg lod2, - fs_reg sample_index, fs_reg mcs, fs_reg sampler); - fs_reg emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, fs_reg sampler); - void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); - fs_reg fix_math_operand(fs_reg src); - fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); - void emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, - const fs_reg &a); - void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1); - bool try_emit_saturate(ir_expression *ir); - bool try_emit_mad(ir_expression *ir); + const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, + const fs_reg &sampler); + fs_inst *emit_texture_gen5(ir_texture *ir, const fs_reg &dst, fs_reg coordinate, + const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, + const fs_reg &sample_index, const fs_reg &sampler); + fs_inst *emit_texture_gen7(ir_texture *ir, const fs_reg &dst, fs_reg coordinate, + const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, + fs_reg offset_val, const fs_reg &sample_index, + const fs_reg &mcs, const fs_reg &sampler); + fs_inst *emit_texture(ir_texture *ir, const fs_reg &dst, + const fs_reg &coordinate, const fs_reg &shadow_c, + const fs_reg &lod, const fs_reg &lod2, + const fs_reg &offset_val, const fs_reg &sample_index, + const fs_reg &mcs, const fs_reg &sampler); + fs_reg emit_untyped_surface_header(); void try_replace_with_sel(); bool opt_peephole_sel(); bool opt_peephole_predicated_break(); bool opt_saturate_propagation(); - void emit_bool_to_cond_code(ir_rvalue *condition); - void emit_if_gen6(ir_if *ir); void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, @@ -317,59 +224,45 @@ public: fs_reg src0_alpha, unsigned components); void emit_fb_writes(); - void emit_shader_time_begin(); - void emit_shader_time_end(); - void emit_shader_time_write(enum shader_time_shader_type type, - fs_reg value); - - void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, - fs_reg dst, fs_reg offset, fs_reg src0, - fs_reg src1); + void emit_interpolate_expression(ir_expression *ir); - void emit_untyped_surface_read(unsigned surf_index, fs_reg dst, - fs_reg offset); + void emit_pull_constant_load(brw::fs_builder &bld, + const dst_reg &dst, + const src_reg &surf_index, + uint32_t offset, + const src_reg *reladdr, + unsigned num_components); - void emit_interpolate_expression(ir_expression *ir); + struct brw_reg interp_reg(int location, int channel); - bool try_rewrite_rhs_to_dst(ir_assignment *ir, - fs_reg dst, - fs_reg src, - fs_inst *pre_rhs_inst, - fs_inst *last_rhs_inst); - void emit_assignment_writes(fs_reg &l, fs_reg &r, - const glsl_type *type, bool predicated); - void resolve_ud_negate(fs_reg *reg); - void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg); + void emit_pack_half_2x16(dst_reg dst, src_reg src0) + { + unreachable("not reached"); + } - fs_reg get_timestamp(); + void emit_unpack_half_2x16(dst_reg dst, src_reg src0) + { + unreachable("not reached"); + } - struct brw_reg interp_reg(int location, int channel); - void setup_uniform_values(ir_variable *ir); - void setup_builtin_uniform_values(ir_variable *ir); - int implied_mrf_writes(fs_inst *inst); + const struct brw_sampler_prog_key_data * + sampler_prog_key() const { + return &((const brw_wm_prog_key *)key)->tex; + } virtual void dump_instructions(); virtual void dump_instructions(const char *name); void dump_instruction(backend_instruction *inst); void dump_instruction(backend_instruction *inst, FILE *file); - void visit_atomic_counter_intrinsic(ir_call *ir); - const void *const key; struct brw_stage_prog_data *prog_data; unsigned int sanity_param_count; - int *param_size; - - int *virtual_grf_start; - int *virtual_grf_end; brw::fs_live_variables *live_intervals; int *regs_live_at_ip; - /** Number of uniform variable components visited. */ - unsigned uniforms; - /** Byte-offset for the next available spot in the scratch space buffer. */ unsigned last_scratch; @@ -385,33 +278,19 @@ public: */ int *push_constant_loc; - struct hash_table *variable_ht; fs_reg frag_depth; fs_reg sample_mask; fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; unsigned output_components[BRW_MAX_DRAW_BUFFERS]; fs_reg dual_src_output; bool do_dual_src; - int first_non_payload_grf; - /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ - unsigned max_grf; fs_reg *fp_temp_regs; fs_reg *fp_input_regs; - /** @{ debug annotation info */ - const char *current_annotation; - const void *base_ir; - /** @} */ - - bool failed; - char *fail_msg; bool simd16_unsupported; char *no16_msg; - /* Result of last visit() method. */ - fs_reg result; - /** Register numbers for thread payload fields. */ struct { uint8_t source_depth_reg; @@ -435,14 +314,11 @@ public: fs_reg pixel_w; fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; - fs_reg shader_start_time; unsigned grf_used; bool spilled_any_registers; const unsigned dispatch_width; /**< 8 or 16 */ - - int force_uncompressed_stack; }; /** @@ -486,7 +362,8 @@ private: struct brw_reg src1); void generate_math_gen4(fs_inst *inst, struct brw_reg dst, - struct brw_reg src); + struct brw_reg src0, + struct brw_reg src1); void generate_math_g45(fs_inst *inst, struct brw_reg dst, struct brw_reg src); @@ -540,11 +417,6 @@ private: struct brw_reg dst, struct brw_reg src); - void generate_shader_time_add(fs_inst *inst, - struct brw_reg payload, - struct brw_reg offset, - struct brw_reg value); - void generate_untyped_atomic(fs_inst *inst, struct brw_reg dst, struct brw_reg payload, diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index e1989cb5e4c..99a412a85b7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -302,7 +302,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) (entry->dst.reg_offset + entry->regs_written) * 32) return false; - /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */ + /* See fix_condmod_negate() and comment in brw_fs_emit.cpp. */ if (inst->conditional_mod && inst->src[arg].type == BRW_REGISTER_TYPE_UD && entry->src.negate) @@ -381,7 +381,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) break; case GRF: { - assert(entry->src.width % inst->src[arg].width == 0); + assert(entry->src.width % inst->src[arg].width == 0 || + entry->src.width == 1); /* In this case, we'll just leave the width alone. The source * register could have different widths depending on how it is * being used. For instance, if only half of the register was diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 87f67564657..0aeb67c0900 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -211,18 +211,18 @@ fs_visitor::opt_cse_local(bblock_t *block) entry->tmp = tmp; entry->generator->dst = tmp; - fs_inst *copy; + brw::fs_builder ibld = bld.at(block, + (fs_inst *)entry->generator->next); if (written > dst_width) { fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width); for (int i = 0; i < written / dst_width; i++) sources[i] = offset(tmp, i); - copy = LOAD_PAYLOAD(orig_dst, sources, written / dst_width); + ibld.LOAD_PAYLOAD(orig_dst, sources, written / dst_width); } else { - copy = MOV(orig_dst, tmp); - copy->force_writemask_all = + ibld.MOV(orig_dst, tmp) + ->force_writemask_all = entry->generator->force_writemask_all; } - entry->generator->insert_after(block, copy); } /* dest <- temp */ @@ -234,17 +234,16 @@ fs_visitor::opt_cse_local(bblock_t *block) assert(inst->dst.type == entry->tmp.type); fs_reg dst = inst->dst; fs_reg tmp = entry->tmp; - fs_inst *copy; + brw::fs_builder ibld = bld.at(block, inst); if (written > dst_width) { fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width); for (int i = 0; i < written / dst_width; i++) sources[i] = offset(tmp, i); - copy = LOAD_PAYLOAD(dst, sources, written / dst_width); + ibld.LOAD_PAYLOAD(dst, sources, written / dst_width); } else { - copy = MOV(dst, tmp); - copy->force_writemask_all = inst->force_writemask_all; + ibld.MOV(dst, tmp) + ->force_writemask_all = inst->force_writemask_all; } - inst->insert_before(block, copy); } /* Set our iterator so that next time through the loop inst->next diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp index 9f0c0c7ac48..114bf67494d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp @@ -37,7 +37,7 @@ fs_visitor::emit_fp_alu1(enum opcode opcode, { for (int i = 0; i < 4; i++) { if (fpi->DstReg.WriteMask & (1 << i)) - emit(opcode, offset(dst, i), offset(src, i)); + bld.emit(opcode, offset(dst, i), offset(src, i)); } } @@ -48,7 +48,7 @@ fs_visitor::emit_fp_alu2(enum opcode opcode, { for (int i = 0; i < 4; i++) { if (fpi->DstReg.WriteMask & (1 << i)) - emit(opcode, offset(dst, i), + bld.emit(opcode, offset(dst, i), offset(src0, i), offset(src1, i)); } } @@ -65,7 +65,7 @@ fs_visitor::emit_fp_minmax(const prog_instruction *fpi, for (int i = 0; i < 4; i++) { if (fpi->DstReg.WriteMask & (1 << i)) { - emit_minmax(conditionalmod, offset(dst, i), + bld.emit_minmax(conditionalmod, offset(dst, i), offset(src0, i), offset(src1, i)); } } @@ -81,10 +81,10 @@ fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod, if (fpi->DstReg.WriteMask & (1 << i)) { fs_inst *inst; - emit(CMP(reg_null_d, offset(src0, i), offset(src1, i), - conditional_mod)); + bld.CMP(bld.reg_null_d(), offset(src0, i), offset(src1, i), + conditional_mod); - inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f)); + inst = bld.emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f)); inst->predicate = BRW_PREDICATE_NORMAL; } } @@ -96,7 +96,7 @@ fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi, { for (int i = 0; i < 4; i++) { if (fpi->DstReg.WriteMask & (1 << i)) - emit(MOV(offset(dst, i), src)); + bld.MOV(offset(dst, i), src); } } @@ -106,7 +106,7 @@ fs_visitor::emit_fp_scalar_math(enum opcode opcode, fs_reg dst, fs_reg src) { fs_reg temp = fs_reg(this, glsl_type::float_type); - emit_math(opcode, temp, src); + bld.emit_math(opcode, temp, src); emit_fp_scalar_write(fpi, dst, temp); } @@ -126,11 +126,11 @@ fs_visitor::emit_fragment_program_code() * mov.f0 dst 1.0 */ fs_reg one = fs_reg(this, glsl_type::float_type); - emit(MOV(one, fs_reg(1.0f))); + bld.MOV(one, fs_reg(1.0f)); for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) { const struct prog_instruction *fpi = &prog->Instructions[insn]; - base_ir = fpi; + bld.set_base_ir(fpi); //_mesa_print_instruction(fpi); @@ -161,10 +161,10 @@ fs_visitor::emit_fragment_program_code() if (fpi->DstReg.WriteMask & (1 << i)) { fs_inst *inst; - emit(CMP(reg_null_f, offset(src[0], i), fs_reg(0.0f), - BRW_CONDITIONAL_L)); + bld.CMP(bld.reg_null_f(), offset(src[0], i), fs_reg(0.0f), + BRW_CONDITIONAL_L); - inst = emit(BRW_OPCODE_SEL, offset(dst, i), + inst = bld.emit(BRW_OPCODE_SEL, offset(dst, i), offset(src[1], i), offset(src[2], i)); inst->predicate = BRW_PREDICATE_NORMAL; } @@ -191,14 +191,14 @@ fs_visitor::emit_fragment_program_code() default: unreachable("not reached"); } - emit(MUL(acc, offset(src[0], 0), offset(src[1], 0))); + bld.MUL(acc, offset(src[0], 0), offset(src[1], 0)); for (int i = 1; i < count; i++) { - emit(MUL(mul, offset(src[0], i), offset(src[1], i))); - emit(ADD(acc, acc, mul)); + bld.MUL(mul, offset(src[0], i), offset(src[1], i)); + bld.ADD(acc, acc, mul); } if (fpi->Opcode == OPCODE_DPH) - emit(ADD(acc, acc, offset(src[1], 3))); + bld.ADD(acc, acc, offset(src[1], 3)); emit_fp_scalar_write(fpi, dst, acc); break; @@ -206,15 +206,15 @@ fs_visitor::emit_fragment_program_code() case OPCODE_DST: if (fpi->DstReg.WriteMask & WRITEMASK_X) - emit(MOV(dst, fs_reg(1.0f))); + bld.MOV(dst, fs_reg(1.0f)); if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit(MUL(offset(dst, 1), - offset(src[0], 1), offset(src[1], 1))); + bld.MUL(offset(dst, 1), + offset(src[0], 1), offset(src[1], 1)); } if (fpi->DstReg.WriteMask & WRITEMASK_Z) - emit(MOV(offset(dst, 2), offset(src[0], 2))); + bld.MOV(offset(dst, 2), offset(src[0], 2)); if (fpi->DstReg.WriteMask & WRITEMASK_W) - emit(MOV(offset(dst, 3), offset(src[1], 3))); + bld.MOV(offset(dst, 3), offset(src[1], 3)); break; case OPCODE_EX2: @@ -248,8 +248,8 @@ fs_visitor::emit_fragment_program_code() * undiscarded pixels, and updates just those pixels to be * turned off. */ - fs_inst *cmp = emit(CMP(reg_null_f, offset(src[0], i), - fs_reg(0.0f), BRW_CONDITIONAL_GE)); + fs_inst *cmp = bld.CMP(bld.reg_null_f(), offset(src[0], i), + fs_reg(0.0f), BRW_CONDITIONAL_GE); cmp->predicate = BRW_PREDICATE_NORMAL; cmp->flag_subreg = 1; } @@ -277,30 +277,30 @@ fs_visitor::emit_fragment_program_code() * brw_wm_emit.c either. */ if (fpi->DstReg.WriteMask & WRITEMASK_X) - emit(MOV(offset(dst, 0), fs_reg(1.0f))); + bld.MOV(offset(dst, 0), fs_reg(1.0f)); if (fpi->DstReg.WriteMask & WRITEMASK_YZ) { fs_inst *inst; - emit(CMP(reg_null_f, offset(src[0], 0), fs_reg(0.0f), - BRW_CONDITIONAL_LE)); + bld.CMP(bld.reg_null_f(), offset(src[0], 0), fs_reg(0.0f), + BRW_CONDITIONAL_LE); if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit(MOV(offset(dst, 1), offset(src[0], 0))); - inst = emit(MOV(offset(dst, 1), fs_reg(0.0f))); + bld.MOV(offset(dst, 1), offset(src[0], 0)); + inst = bld.MOV(offset(dst, 1), fs_reg(0.0f)); inst->predicate = BRW_PREDICATE_NORMAL; } if (fpi->DstReg.WriteMask & WRITEMASK_Z) { - emit_math(SHADER_OPCODE_POW, offset(dst, 2), - offset(src[0], 1), offset(src[0], 3)); + bld.emit_math(SHADER_OPCODE_POW, offset(dst, 2), + offset(src[0], 1), offset(src[0], 3)); - inst = emit(MOV(offset(dst, 2), fs_reg(0.0f))); + inst = bld.MOV(offset(dst, 2), fs_reg(0.0f)); inst->predicate = BRW_PREDICATE_NORMAL; } } if (fpi->DstReg.WriteMask & WRITEMASK_W) - emit(MOV(offset(dst, 3), fs_reg(1.0f))); + bld.MOV(offset(dst, 3), fs_reg(1.0f)); break; @@ -310,7 +310,7 @@ fs_visitor::emit_fragment_program_code() fs_reg a = offset(src[0], i); fs_reg y = offset(src[1], i); fs_reg x = offset(src[2], i); - emit_lrp(offset(dst, i), x, y, a); + bld.LRP(offset(dst, i), x, y, a); } } break; @@ -319,8 +319,8 @@ fs_visitor::emit_fragment_program_code() for (int i = 0; i < 4; i++) { if (fpi->DstReg.WriteMask & (1 << i)) { fs_reg temp = fs_reg(this, glsl_type::float_type); - emit(MUL(temp, offset(src[0], i), offset(src[1], i))); - emit(ADD(offset(dst, i), temp, offset(src[2], i))); + bld.MUL(temp, offset(src[0], i), offset(src[1], i)); + bld.ADD(offset(dst, i), temp, offset(src[2], i)); } } break; @@ -343,7 +343,7 @@ fs_visitor::emit_fragment_program_code() case OPCODE_POW: { fs_reg temp = fs_reg(this, glsl_type::float_type); - emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]); + bld.emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]); emit_fp_scalar_write(fpi, dst, temp); break; } @@ -358,13 +358,13 @@ fs_visitor::emit_fragment_program_code() case OPCODE_SCS: if (fpi->DstReg.WriteMask & WRITEMASK_X) { - emit_math(SHADER_OPCODE_COS, offset(dst, 0), - offset(src[0], 0)); + bld.emit_math(SHADER_OPCODE_COS, offset(dst, 0), + offset(src[0], 0)); } if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit_math(SHADER_OPCODE_SIN, offset(dst, 1), - offset(src[0], 1)); + bld.emit_math(SHADER_OPCODE_SIN, offset(dst, 1), + offset(src[0], 1)); } break; @@ -414,10 +414,10 @@ fs_visitor::emit_fragment_program_code() coordinate = fs_reg(this, glsl_type::vec3_type); fs_reg invproj = fs_reg(this, glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3)); + bld.emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3)); for (int i = 0; i < 3; i++) { - emit(MUL(offset(coordinate, i), - offset(src[0], i), invproj)); + bld.MUL(offset(coordinate, i), + offset(src[0], i), invproj); } break; } @@ -457,14 +457,14 @@ fs_visitor::emit_fragment_program_code() fs_reg abscoord = coordinate; abscoord.negate = false; abscoord.abs = true; - emit_minmax(BRW_CONDITIONAL_GE, temp, - offset(abscoord, 0), offset(abscoord, 1)); - emit_minmax(BRW_CONDITIONAL_GE, temp, - temp, offset(abscoord, 2)); - emit_math(SHADER_OPCODE_RCP, temp, temp); + bld.emit_minmax(BRW_CONDITIONAL_GE, temp, + offset(abscoord, 0), offset(abscoord, 1)); + bld.emit_minmax(BRW_CONDITIONAL_GE, temp, + temp, offset(abscoord, 2)); + bld.emit_math(SHADER_OPCODE_RCP, temp, temp); for (int i = 0; i < 3; i++) { - emit(MUL(offset(cubecoord, i), - offset(coordinate, i), temp)); + bld.MUL(offset(cubecoord, i), + offset(coordinate, i), temp); } coordinate = cubecoord; @@ -485,15 +485,9 @@ fs_visitor::emit_fragment_program_code() fpi->TexSrcTarget == TEXTURE_RECT_INDEX, fpi->TexSrcUnit, fpi->TexSrcUnit); - fs_inst *inst; - if (brw->gen >= 7) { - inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fs_reg(0u), fs_reg(fpi->TexSrcUnit)); - } else if (brw->gen >= 5) { - inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fpi->TexSrcUnit); - } else { - inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy, fpi->TexSrcUnit); - } - + fs_inst *inst = emit_texture(ir, dst, coordinate, shadow_c, lod, dpdy, + fs_reg(), sample_index, fs_reg(0u), + fs_reg(fpi->TexSrcUnit)); inst->shadow_compare = fpi->TexShadow; /* Reuse the GLSL swizzle_result() handler. */ @@ -519,10 +513,10 @@ fs_visitor::emit_fragment_program_code() fs_reg temp = fs_reg(this, glsl_type::float_type); fs_reg neg_src1_1 = offset(src[1], i1); neg_src1_1.negate = !neg_src1_1.negate; - emit(MUL(temp, offset(src[0], i2), neg_src1_1)); - emit(MUL(offset(dst, i), - offset(src[0], i1), offset(src[1], i2))); - emit(ADD(offset(dst, i), offset(dst, i), temp)); + bld.MUL(temp, offset(src[0], i2), neg_src1_1); + bld.MUL(offset(dst, i), + offset(src[0], i1), offset(src[1], i2)); + bld.ADD(offset(dst, i), offset(dst, i), temp); } } break; @@ -543,8 +537,8 @@ fs_visitor::emit_fragment_program_code() for (int i = 0; i < 4; i++) { if (fpi->DstReg.WriteMask & (1 << i)) { - fs_inst *inst = emit(MOV(offset(real_dst, i), - offset(dst, i))); + fs_inst *inst = bld.MOV(offset(real_dst, i), + offset(dst, i)); inst->saturate = fpi->SaturateMode; } } @@ -556,10 +550,10 @@ fs_visitor::emit_fragment_program_code() * Fragment depth has this strange convention of being the .z component of * a vec4. emit_fb_write() wants to see a float value, instead. */ - this->current_annotation = "result.depth write"; + bld.set_annotation("result.depth write"); if (frag_depth.file != BAD_FILE) { fs_reg temp = fs_reg(this, glsl_type::float_type); - emit(MOV(temp, offset(frag_depth, 2))); + bld.MOV(temp, offset(frag_depth, 2)); frag_depth = temp; } } @@ -595,8 +589,8 @@ fs_visitor::setup_fp_regs() ir_var_shader_in); ir->data.location = i; - this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d", - i); + bld.set_annotation(ralloc_asprintf(ctx, "interpolate input %d", + i)); switch (i) { case VARYING_SLOT_POS: @@ -615,15 +609,15 @@ fs_visitor::setup_fp_regs() fp_input_regs[i] = *emit_general_interpolation(ir); if (i == VARYING_SLOT_FOGC) { - emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f))); - emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f))); - emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f))); + bld.MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)); + bld.MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)); + bld.MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)); } break; } - this->current_annotation = NULL; + bld.set_annotation(NULL); } } } @@ -708,8 +702,8 @@ fs_visitor::get_fp_src_reg(const prog_src_register *src) result = fs_reg(this, glsl_type::vec4_type); for (int i = 0; i < 4; i++) { - emit(MOV(offset(result, i), - fs_reg(plist->ParameterValues[src->Index][i].f))); + bld.MOV(offset(result, i), + fs_reg(plist->ParameterValues[src->Index][i].f)); } break; } @@ -742,15 +736,15 @@ fs_visitor::get_fp_src_reg(const prog_src_register *src) */ int src_swiz = GET_SWZ(src->Swizzle, i); if (src_swiz == SWIZZLE_ZERO) { - emit(MOV(offset(result, i), fs_reg(0.0f))); + bld.MOV(offset(result, i), fs_reg(0.0f)); } else if (src_swiz == SWIZZLE_ONE) { - emit(MOV(offset(result, i), - negate ? fs_reg(-1.0f) : fs_reg(1.0f))); + bld.MOV(offset(result, i), + negate ? fs_reg(-1.0f) : fs_reg(1.0f)); } else { fs_reg src = offset(unswizzled, src_swiz); if (negate) src.negate = !src.negate; - emit(MOV(offset(result, i), src)); + bld.MOV(offset(result, i), src); } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index c2010c036c9..ee3eec4a665 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -329,28 +329,51 @@ fs_generator::generate_math_gen6(fs_inst *inst, void fs_generator::generate_math_gen4(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src) + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { + /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 + * "Message Payload": + * + * "Operand0[7]. For the INT DIV functions, this operand is the + * denominator." + * ... + * "Operand1[7]. For the INT DIV functions, this operand is the + * numerator." + */ + bool is_int_div = (inst->opcode == SHADER_OPCODE_INT_QUOTIENT || + inst->opcode == SHADER_OPCODE_INT_REMAINDER); + struct brw_reg &op0 = is_int_div ? src1 : src0; + struct brw_reg &op1 = is_int_div ? src0 : src1; int op = brw_math_function(inst->opcode); assert(inst->mlen >= 1); + if (src1.file != BRW_ARCHITECTURE_REGISTER_FILE) { + brw_push_insn_state(p); + brw_set_default_saturate(p, false); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1); + brw_pop_insn_state(p); + } + if (dispatch_width == 8) { gen4_math(p, dst, op, - inst->base_mrf, src, + inst->base_mrf, op0, BRW_MATH_PRECISION_FULL); + } else if (dispatch_width == 16) { brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); gen4_math(p, firsthalf(dst), op, - inst->base_mrf, firsthalf(src), + inst->base_mrf, firsthalf(op0), BRW_MATH_PRECISION_FULL); brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); gen4_math(p, sechalf(dst), op, - inst->base_mrf + 1, sechalf(src), + inst->base_mrf + 1, sechalf(op0), BRW_MATH_PRECISION_FULL); brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); @@ -362,13 +385,9 @@ fs_generator::generate_math_g45(fs_inst *inst, struct brw_reg dst, struct brw_reg src) { - if (inst->opcode == SHADER_OPCODE_POW || - inst->opcode == SHADER_OPCODE_INT_QUOTIENT || - inst->opcode == SHADER_OPCODE_INT_REMAINDER) { - generate_math_gen4(inst, dst, src); - return; - } - + assert(inst->opcode != SHADER_OPCODE_POW && + inst->opcode != SHADER_OPCODE_INT_QUOTIENT && + inst->opcode != SHADER_OPCODE_INT_REMAINDER); int op = brw_math_function(inst->opcode); assert(inst->mlen >= 1); @@ -1442,45 +1461,6 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, } void -fs_generator::generate_shader_time_add(fs_inst *inst, - struct brw_reg payload, - struct brw_reg offset, - struct brw_reg value) -{ - assert(brw->gen >= 7); - brw_push_insn_state(p); - brw_set_default_mask_control(p, true); - - assert(payload.file == BRW_GENERAL_REGISTER_FILE); - struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0), - offset.type); - struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0), - value.type); - - assert(offset.file == BRW_IMMEDIATE_VALUE); - if (value.file == BRW_GENERAL_REGISTER_FILE) { - value.width = BRW_WIDTH_1; - value.hstride = BRW_HORIZONTAL_STRIDE_0; - value.vstride = BRW_VERTICAL_STRIDE_0; - } else { - assert(value.file == BRW_IMMEDIATE_VALUE); - } - - /* Trying to deal with setup of the params from the IR is crazy in the FS8 - * case, and we don't really care about squeezing every bit of performance - * out of this path, so we just emit the MOVs from here. - */ - brw_MOV(p, payload_offset, offset); - brw_MOV(p, payload_value, value); - brw_shader_time_add(p, payload, - prog_data->binding_table.shader_time_start); - brw_pop_insn_state(p); - - brw_mark_surface_used(prog_data, - prog_data->binding_table.shader_time_start); -} - -void fs_generator::generate_untyped_atomic(fs_inst *inst, struct brw_reg dst, struct brw_reg payload, struct brw_reg atomic_op, @@ -1805,7 +1785,7 @@ fs_generator::generate_code(const cfg_t *cfg) } else if (brw->gen == 5 || brw->is_g4x) { generate_math_g45(inst, dst, src[0]); } else { - generate_math_gen4(inst, dst, src[0]); + generate_math_gen4(inst, dst, src[0], brw_null_reg()); } break; case SHADER_OPCODE_INT_QUOTIENT: @@ -1817,7 +1797,7 @@ fs_generator::generate_code(const cfg_t *cfg) } else if (brw->gen >= 6) { generate_math_gen6(inst, dst, src[0], src[1]); } else { - generate_math_gen4(inst, dst, src[0]); + generate_math_gen4(inst, dst, src[0], src[1]); } break; case FS_OPCODE_PIXEL_X: @@ -1905,7 +1885,10 @@ fs_generator::generate_code(const cfg_t *cfg) break; case SHADER_OPCODE_SHADER_TIME_ADD: - generate_shader_time_add(inst, src[0], src[1], src[2]); + brw_shader_time_add(p, src[0], + prog_data->binding_table.shader_time_start); + brw_mark_surface_used(prog_data, + prog_data->binding_table.shader_time_start); break; case SHADER_OPCODE_UNTYPED_ATOMIC: diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp index b7a1d7e7722..7e7371f3a8e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp @@ -85,8 +85,8 @@ fs_visitor::opt_peephole_predicated_break() * instruction to set the flag register. */ if (brw->gen == 6 && if_inst->conditional_mod) { - fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); + fs_inst *cmp_inst = bld.CMP(bld.reg_null_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); if_inst->insert_before(if_block, cmp_inst); jump_inst->predicate = BRW_PREDICATE_NORMAL; } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 833ba15b1b6..b792b03e5e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -666,7 +666,7 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, for (int i = 0; i < count / reg_size; i++) { fs_inst *spill_inst = new(mem_ctx) fs_inst(SHADER_OPCODE_GEN4_SCRATCH_WRITE, - reg_null_f, src); + bld.reg_null_f(), src); src.reg_offset += reg_size; spill_inst->offset = spill_offset + i * reg_size; spill_inst->ir = inst->ir; diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index c3bfd00e70d..c3e96a6e31a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -66,7 +66,8 @@ count_movs_from_if(fs_inst *then_mov[MAX_MOVS], fs_inst *else_mov[MAX_MOVS], { int then_movs = 0; foreach_inst_in_block(fs_inst, inst, then_block) { - if (then_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV) + if (then_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV || + inst->is_partial_write()) break; then_mov[then_movs] = inst; @@ -75,7 +76,12 @@ count_movs_from_if(fs_inst *then_mov[MAX_MOVS], fs_inst *else_mov[MAX_MOVS], int else_movs = 0; foreach_inst_in_block(fs_inst, inst, else_block) { - if (else_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV) + if (else_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV || + /* Check that the MOVs are the right form. */ + !then_mov[else_movs] || !then_mov[else_movs]->dst.equals(inst->dst) || + /* Check that source types for mov operations match. */ + then_mov[else_movs]->src[0].type != inst->src[0].type || + inst->is_partial_write()) break; else_mov[else_movs] = inst; @@ -148,13 +154,13 @@ fs_visitor::opt_peephole_sel() if (movs == 0) continue; - fs_inst *sel_inst[MAX_MOVS] = { NULL }; - fs_inst *mov_imm_inst[MAX_MOVS] = { NULL }; - + brw::fs_builder ibld = bld.at(block, if_inst); enum brw_predicate predicate; bool predicate_inverse; if (brw->gen == 6 && if_inst->conditional_mod) { - /* For Sandybridge with IF with embedded comparison */ + /* For Sandybridge with IF with embedded comparison. */ + ibld.CMP(ibld.reg_null_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); predicate = BRW_PREDICATE_NORMAL; predicate_inverse = false; } else { @@ -165,25 +171,8 @@ fs_visitor::opt_peephole_sel() /* Generate SEL instructions for pairs of MOVs to a common destination. */ for (int i = 0; i < movs; i++) { - if (!then_mov[i] || !else_mov[i]) - break; - - /* Check that the MOVs are the right form. */ - if (!then_mov[i]->dst.equals(else_mov[i]->dst) || - then_mov[i]->is_partial_write() || - else_mov[i]->is_partial_write()) { - movs = i; - break; - } - - /* Check that source types for mov operations match. */ - if (then_mov[i]->src[0].type != else_mov[i]->src[0].type) { - movs = i; - break; - } - if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) { - sel_inst[i] = MOV(then_mov[i]->dst, then_mov[i]->src[0]); + ibld.MOV(then_mov[i]->dst, then_mov[i]->src[0]); } else { /* Only the last source register can be a constant, so if the MOV * in the "then" clause uses a constant, we need to put it in a @@ -193,29 +182,13 @@ fs_visitor::opt_peephole_sel() if (src0.file == IMM) { src0 = fs_reg(this, glsl_type::float_type); src0.type = then_mov[i]->src[0].type; - mov_imm_inst[i] = MOV(src0, then_mov[i]->src[0]); + ibld.MOV(src0, then_mov[i]->src[0]); } - sel_inst[i] = SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]); - sel_inst[i]->predicate = predicate; - sel_inst[i]->predicate_inverse = predicate_inverse; + brw::exec_predicate_inv( + predicate, predicate_inverse, + ibld.SEL(then_mov[i]->dst, src0, else_mov[i]->src[0])); } - } - - if (movs == 0) - continue; - - /* Emit a CMP if our IF used the embedded comparison */ - if (brw->gen == 6 && if_inst->conditional_mod) { - fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); - if_inst->insert_before(block, cmp_inst); - } - - for (int i = 0; i < movs; i++) { - if (mov_imm_inst[i]) - if_inst->insert_before(block, mov_imm_inst[i]); - if_inst->insert_before(block, sel_inst[i]); then_mov[i]->remove(then_block); else_mov[i]->remove(else_block); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 034a4830a9b..a898ebbe636 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -37,7 +37,6 @@ extern "C" { #include "program/prog_print.h" #include "program/prog_optimize.h" #include "util/register_allocate.h" -#include "program/sampler.h" #include "program/hash_table.h" #include "brw_context.h" #include "brw_eu.h" @@ -67,8 +66,7 @@ fs_visitor::visit(ir_variable *ir) reg = emit_general_interpolation(ir); } assert(reg); - hash_table_insert(this->variable_ht, reg, ir); - return; + } else if (ir->data.mode == ir_var_shader_out) { reg = new(this->mem_ctx) fs_reg(this, ir->type); @@ -105,35 +103,6 @@ fs_visitor::visit(ir_variable *ir) this->output_components[output] = vector_elements; } } - } else if (ir->data.mode == ir_var_uniform) { - int param_index = uniforms; - - /* Thanks to the lower_ubo_reference pass, we will see only - * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO - * variables, so no need for them to be in variable_ht. - * - * Some uniforms, such as samplers and atomic counters, have no actual - * storage, so we should ignore them. - */ - if (ir->is_in_uniform_block() || type_size(ir->type) == 0) - return; - - if (dispatch_width == 16) { - if (!variable_storage(ir)) { - fail("Failed to find uniform '%s' in SIMD16\n", ir->name); - } - return; - } - - param_size[param_index] = type_size(ir->type); - if (!strncmp(ir->name, "gl_", 3)) { - setup_builtin_uniform_values(ir); - } else { - setup_uniform_values(ir); - } - - reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); - reg->type = brw_type_for_base_type(ir->type); } else if (ir->data.mode == ir_var_system_value) { if (ir->data.location == SYSTEM_VALUE_SAMPLE_POS) { @@ -146,199 +115,13 @@ fs_visitor::visit(ir_variable *ir) fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0), BRW_REGISTER_TYPE_D)); } - } - - if (!reg) - reg = new(this->mem_ctx) fs_reg(this, ir->type); - - hash_table_insert(this->variable_ht, reg, ir); -} - -void -fs_visitor::visit(ir_dereference_variable *ir) -{ - fs_reg *reg = variable_storage(ir->var); - - if (!reg) { - fail("Failed to find variable storage for %s\n", ir->var->name); - this->result = fs_reg(reg_null_d); - return; - } - this->result = *reg; -} - -void -fs_visitor::visit(ir_dereference_record *ir) -{ - const glsl_type *struct_type = ir->record->type; - - ir->record->accept(this); - - unsigned int off = 0; - for (unsigned int i = 0; i < struct_type->length; i++) { - if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) - break; - off += type_size(struct_type->fields.structure[i].type); - } - this->result = offset(this->result, off); - this->result.type = brw_type_for_base_type(ir->type); -} - -void -fs_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *constant_index; - fs_reg src; - int element_size = type_size(ir->type); - - constant_index = ir->array_index->as_constant(); - - ir->array->accept(this); - src = this->result; - src.type = brw_type_for_base_type(ir->type); - - if (constant_index) { - assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG); - src = offset(src, constant_index->value.i[0] * element_size); - } else { - /* Variable index array dereference. We attach the variable index - * component to the reg as a pointer to a register containing the - * offset. Currently only uniform arrays are supported in this patch, - * and that reladdr pointer is resolved by - * move_uniform_array_access_to_pull_constants(). All other array types - * are lowered by lower_variable_index_to_cond_assign(). - */ - ir->array_index->accept(this); - - fs_reg index_reg; - index_reg = fs_reg(this, glsl_type::int_type); - emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size)); - - if (src.reladdr) { - emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg); - } - - src.reladdr = ralloc(mem_ctx, fs_reg); - memcpy(src.reladdr, &index_reg, sizeof(index_reg)); - } - this->result = src; -} -void -fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, - const fs_reg &a) -{ - if (brw->gen < 6 || - !x.is_valid_3src() || - !y.is_valid_3src() || - !a.is_valid_3src()) { - /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ - fs_reg y_times_a = fs_reg(this, glsl_type::float_type); - fs_reg one_minus_a = fs_reg(this, glsl_type::float_type); - fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type); - - emit(MUL(y_times_a, y, a)); - - fs_reg negative_a = a; - negative_a.negate = !a.negate; - emit(ADD(one_minus_a, negative_a, fs_reg(1.0f))); - emit(MUL(x_times_one_minus_a, x, one_minus_a)); - - emit(ADD(dst, x_times_one_minus_a, y_times_a)); } else { - /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so - * we need to reorder the operands. - */ - emit(LRP(dst, a, y, x)); - } -} - -void -fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1) -{ - fs_inst *inst; - - if (brw->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(reg_null_d, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } -} - -bool -fs_visitor::try_emit_saturate(ir_expression *ir) -{ - if (ir->operation != ir_unop_saturate) - return false; - - ir_rvalue *sat_val = ir->operands[0]; - - fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); - - sat_val->accept(this); - fs_reg src = this->result; - - fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); - - /* If the last instruction from our accept() generated our - * src, just set the saturate flag instead of emmitting a separate mov. - */ - fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); - if (modify && modify->regs_written == modify->dst.width / 8 && - modify->can_do_saturate()) { - modify->saturate = true; - this->result = src; - return true; - } - - return false; -} - -bool -fs_visitor::try_emit_mad(ir_expression *ir) -{ - /* 3-src instructions were introduced in gen6. */ - if (brw->gen < 6) - return false; - - /* MAD can only handle floating-point data. */ - if (ir->type != glsl_type::float_type) - return false; - - ir_rvalue *nonmul = ir->operands[1]; - ir_expression *mul = ir->operands[0]->as_expression(); - - if (!mul || mul->operation != ir_binop_mul) { - nonmul = ir->operands[0]; - mul = ir->operands[1]->as_expression(); - - if (!mul || mul->operation != ir_binop_mul) - return false; + backend_visitor::visit(ir); + return; } - if (nonmul->as_constant() || - mul->operands[0]->as_constant() || - mul->operands[1]->as_constant()) - return false; - - nonmul->accept(this); - fs_reg src0 = this->result; - - mul->operands[0]->accept(this); - fs_reg src1 = this->result; - - mul->operands[1]->accept(this); - fs_reg src2 = this->result; - - this->result = fs_reg(this, ir->type); - emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); - - return true; + hash_table_insert(this->variable_ht, reg, ir); } static int @@ -391,7 +174,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) switch (ir->operation) { case ir_unop_interpolate_at_centroid: - inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u)); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u)); break; case ir_binop_interpolate_at_sample: { @@ -399,7 +182,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) assert(sample_num || !"nonconstant sample number should have been lowered."); unsigned msg_data = sample_num->value.i[0] << 4; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data)); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data)); break; } @@ -408,7 +191,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) if (const_offset) { unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) | (pack_pixel_offset(const_offset->value.f[1]) << 4); - inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src, + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src, fs_reg(msg_data)); } else { /* pack the operands: hw wants offsets as 4 bit signed ints */ @@ -417,8 +200,8 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) fs_reg src2 = src; for (int i = 0; i < 2; i++) { fs_reg temp = fs_reg(this, glsl_type::float_type); - emit(MUL(temp, this->result, fs_reg(16.0f))); - emit(MOV(src2, temp)); /* float to int */ + bld.MUL(temp, this->result, fs_reg(16.0f)); + bld.MOV(src2, temp); /* float to int */ /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires * that we support a maximum offset of +0.5, which isn't representable @@ -433,7 +216,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) * FRAGMENT_INTERPOLATION_OFFSET_BITS" */ - fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7)); + fs_inst *inst = bld.emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7)); inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */ src2 = offset(src2, 1); @@ -441,7 +224,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) } mlen = 2 * reg_width; - inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src, + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src, fs_reg(0u)); } break; @@ -463,714 +246,17 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) for (int i = 0; i < ir->type->vector_elements; i++) { int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i; - emit(FS_OPCODE_LINTERP, res, + bld.emit(FS_OPCODE_LINTERP, res, dst_x, dst_y, fs_reg(interp_reg(var->data.location, ch))); res = offset(res, 1); } } -void -fs_visitor::visit(ir_expression *ir) -{ - unsigned int operand; - fs_reg op[3], temp; - fs_inst *inst; - - assert(ir->get_num_operands() <= 3); - - if (try_emit_saturate(ir)) - return; - - /* Deal with the real oddball stuff first */ - switch (ir->operation) { - case ir_binop_add: - if (try_emit_mad(ir)) - return; - break; - - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - emit_interpolate_expression(ir); - return; - - default: - break; - } - - for (operand = 0; operand < ir->get_num_operands(); operand++) { - ir->operands[operand]->accept(this); - if (this->result.file == BAD_FILE) { - fail("Failed to get tree for expression operand:\n"); - ir->operands[operand]->fprint(stderr); - fprintf(stderr, "\n"); - } - assert(this->result.is_valid_3src()); - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - /* And then those vector operands should have been broken down to scalar. - */ - assert(!ir->operands[operand]->type->is_vector()); - } - - /* Storage for our result. If our result goes into an assignment, it will - * just get copy-propagated out, so no worries. - */ - this->result = fs_reg(this, ir->type); - - switch (ir->operation) { - case ir_unop_logic_not: - if (ctx->Const.UniformBooleanTrue != 1) { - emit(NOT(this->result, op[0])); - } else { - emit(XOR(this->result, op[0], fs_reg(1))); - } - break; - case ir_unop_neg: - op[0].negate = !op[0].negate; - emit(MOV(this->result, op[0])); - break; - case ir_unop_abs: - op[0].abs = true; - op[0].negate = false; - emit(MOV(this->result, op[0])); - break; - case ir_unop_sign: - if (ir->type->is_float()) { - /* AND(val, 0x80000000) gives the sign bit. - * - * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not - * zero. - */ - emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - - op[0].type = BRW_REGISTER_TYPE_UD; - this->result.type = BRW_REGISTER_TYPE_UD; - emit(AND(this->result, op[0], fs_reg(0x80000000u))); - - inst = emit(OR(this->result, this->result, fs_reg(0x3f800000u))); - inst->predicate = BRW_PREDICATE_NORMAL; - - this->result.type = BRW_REGISTER_TYPE_F; - } else { - /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). - * -> non-negative val generates 0x00000000. - * Predicated OR sets 1 if val is positive. - */ - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G)); - - emit(ASR(this->result, op[0], fs_reg(31))); - - inst = emit(OR(this->result, this->result, fs_reg(1))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - break; - case ir_unop_rcp: - emit_math(SHADER_OPCODE_RCP, this->result, op[0]); - break; - - case ir_unop_exp2: - emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); - break; - case ir_unop_log2: - emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); - break; - case ir_unop_exp: - case ir_unop_log: - unreachable("not reached: should be handled by ir_explog_to_explog2"); - case ir_unop_sin: - case ir_unop_sin_reduced: - emit_math(SHADER_OPCODE_SIN, this->result, op[0]); - break; - case ir_unop_cos: - case ir_unop_cos_reduced: - emit_math(SHADER_OPCODE_COS, this->result, op[0]); - break; - - case ir_unop_dFdx: - emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT)); - break; - case ir_unop_dFdx_coarse: - emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE)); - break; - case ir_unop_dFdx_fine: - emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE)); - break; - case ir_unop_dFdy: - emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT)); - break; - case ir_unop_dFdy_coarse: - emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE)); - break; - case ir_unop_dFdy_fine: - emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE)); - break; - - case ir_binop_add: - emit(ADD(this->result, op[0], op[1])); - break; - case ir_binop_sub: - unreachable("not reached: should be handled by ir_sub_to_add_neg"); - - case ir_binop_mul: - if (brw->gen < 8 && ir->type->is_integer()) { - /* For integer multiplication, the MUL uses the low 16 bits - * of one of the operands (src0 on gen6, src1 on gen7). The - * MACH accumulates in the contribution of the upper 16 bits - * of that operand. - */ - if (ir->operands[0]->is_uint16_constant()) { - if (brw->gen < 7) - emit(MUL(this->result, op[0], op[1])); - else - emit(MUL(this->result, op[1], op[0])); - } else if (ir->operands[1]->is_uint16_constant()) { - if (brw->gen < 7) - emit(MUL(this->result, op[1], op[0])); - else - emit(MUL(this->result, op[0], op[1])); - } else { - if (brw->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - this->result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - } - } else { - emit(MUL(this->result, op[0], op[1])); - } - break; - case ir_binop_imul_high: { - if (brw->gen == 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - this->result.type); - - fs_inst *mul = emit(MUL(acc, op[0], op[1])); - emit(MACH(this->result, op[0], op[1])); - - /* Until Gen8, integer multiplies read 32-bits from one source, and - * 16-bits from the other, and relying on the MACH instruction to - * generate the high bits of the result. - * - * On Gen8, the multiply instruction does a full 32x32-bit multiply, - * but in order to do a 64x64-bit multiply we have to simulate the - * previous behavior and then use a MACH instruction. - * - * FINISHME: Don't use source modifiers on src1. - */ - if (brw->gen >= 8) { - assert(mul->src[1].type == BRW_REGISTER_TYPE_D || - mul->src[1].type == BRW_REGISTER_TYPE_UD); - if (mul->src[1].type == BRW_REGISTER_TYPE_D) { - mul->src[1].type = BRW_REGISTER_TYPE_W; - } else { - mul->src[1].type = BRW_REGISTER_TYPE_UW; - } - } - - break; - } - case ir_binop_div: - /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); - break; - case ir_binop_carry: { - if (brw->gen == 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - BRW_REGISTER_TYPE_UD); - - emit(ADDC(reg_null_ud, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - break; - } - case ir_binop_borrow: { - if (brw->gen == 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - BRW_REGISTER_TYPE_UD); - - emit(SUBB(reg_null_ud, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - break; - } - case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); - break; - - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (ctx->Const.UniformBooleanTrue == 1) { - resolve_bool_comparison(ir->operands[0], &op[0]); - resolve_bool_comparison(ir->operands[1], &op[1]); - } - - emit(CMP(this->result, op[0], op[1], - brw_conditional_for_comparison(ir->operation))); - break; - - case ir_binop_logic_xor: - emit(XOR(this->result, op[0], op[1])); - break; - - case ir_binop_logic_or: - emit(OR(this->result, op[0], op[1])); - break; - - case ir_binop_logic_and: - emit(AND(this->result, op[0], op[1])); - break; - - case ir_binop_dot: - case ir_unop_any: - unreachable("not reached: should be handled by brw_fs_channel_expressions"); - - case ir_unop_noise: - unreachable("not reached: should be handled by lower_noise"); - - case ir_quadop_vector: - unreachable("not reached: should be handled by lower_quadop_vector"); - - case ir_binop_vector_extract: - unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()"); - - case ir_triop_vector_insert: - unreachable("not reached: should be handled by lower_vector_insert()"); - - case ir_binop_ldexp: - unreachable("not reached: should be handled by ldexp_to_arith()"); - - case ir_unop_sqrt: - emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); - break; - - case ir_unop_rsq: - emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); - break; - - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - op[0].type = BRW_REGISTER_TYPE_F; - this->result = op[0]; - break; - case ir_unop_i2u: - case ir_unop_bitcast_f2u: - op[0].type = BRW_REGISTER_TYPE_UD; - this->result = op[0]; - break; - case ir_unop_u2i: - case ir_unop_bitcast_f2i: - op[0].type = BRW_REGISTER_TYPE_D; - this->result = op[0]; - break; - case ir_unop_i2f: - case ir_unop_u2f: - case ir_unop_f2i: - case ir_unop_f2u: - emit(MOV(this->result, op[0])); - break; - - case ir_unop_b2i: - emit(AND(this->result, op[0], fs_reg(1))); - break; - case ir_unop_b2f: - if (ctx->Const.UniformBooleanTrue != 1) { - op[0].type = BRW_REGISTER_TYPE_UD; - this->result.type = BRW_REGISTER_TYPE_UD; - emit(AND(this->result, op[0], fs_reg(0x3f800000u))); - this->result.type = BRW_REGISTER_TYPE_F; - } else { - temp = fs_reg(this, glsl_type::int_type); - emit(AND(temp, op[0], fs_reg(1))); - emit(MOV(this->result, temp)); - } - break; - - case ir_unop_f2b: - emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - break; - case ir_unop_i2b: - emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - break; - - case ir_unop_trunc: - emit(RNDZ(this->result, op[0])); - break; - case ir_unop_ceil: - op[0].negate = !op[0].negate; - emit(RNDD(this->result, op[0])); - this->result.negate = true; - break; - case ir_unop_floor: - emit(RNDD(this->result, op[0])); - break; - case ir_unop_fract: - emit(FRC(this->result, op[0])); - break; - case ir_unop_round_even: - emit(RNDE(this->result, op[0])); - break; - - case ir_binop_min: - case ir_binop_max: - resolve_ud_negate(&op[0]); - resolve_ud_negate(&op[1]); - emit_minmax(ir->operation == ir_binop_min ? - BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE, - this->result, op[0], op[1]); - break; - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_unorm_4x8: - case ir_unop_unpack_half_2x16: - case ir_unop_pack_half_2x16: - unreachable("not reached: should be handled by lower_packing_builtins"); - case ir_unop_unpack_half_2x16_split_x: - emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]); - break; - case ir_unop_unpack_half_2x16_split_y: - emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]); - break; - case ir_binop_pow: - emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); - break; - - case ir_unop_bitfield_reverse: - emit(BFREV(this->result, op[0])); - break; - case ir_unop_bit_count: - emit(CBIT(this->result, op[0])); - break; - case ir_unop_find_msb: - temp = fs_reg(this, glsl_type::uint_type); - emit(FBH(temp, op[0])); - - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ - - /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ - emit(MOV(this->result, temp)); - emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ)); - - temp.negate = true; - inst = emit(ADD(this->result, temp, fs_reg(31))); - inst->predicate = BRW_PREDICATE_NORMAL; - break; - case ir_unop_find_lsb: - emit(FBL(this->result, op[0])); - break; - case ir_unop_saturate: - inst = emit(MOV(this->result, op[0])); - inst->saturate = true; - break; - case ir_triop_bitfield_extract: - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(BFE(this->result, op[2], op[1], op[0])); - break; - case ir_binop_bfm: - emit(BFI1(this->result, op[0], op[1])); - break; - case ir_triop_bfi: - emit(BFI2(this->result, op[0], op[1], op[2])); - break; - case ir_quadop_bitfield_insert: - unreachable("not reached: should be handled by " - "lower_instructions::bitfield_insert_to_bfm_bfi"); - - case ir_unop_bit_not: - emit(NOT(this->result, op[0])); - break; - case ir_binop_bit_and: - emit(AND(this->result, op[0], op[1])); - break; - case ir_binop_bit_xor: - emit(XOR(this->result, op[0], op[1])); - break; - case ir_binop_bit_or: - emit(OR(this->result, op[0], op[1])); - break; - - case ir_binop_lshift: - emit(SHL(this->result, op[0], op[1])); - break; - - case ir_binop_rshift: - if (ir->type->base_type == GLSL_TYPE_INT) - emit(ASR(this->result, op[0], op[1])); - else - emit(SHR(this->result, op[0], op[1])); - break; - case ir_binop_pack_half_2x16_split: - emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]); - break; - case ir_binop_ubo_load: { - /* This IR node takes a constant uniform block and a constant or - * variable byte offset within the block and loads a vector from that. - */ - ir_constant *const_uniform_block = ir->operands[0]->as_constant(); - ir_constant *const_offset = ir->operands[1]->as_constant(); - fs_reg surf_index; - - if (const_uniform_block) { - /* The block index is a constant, so just emit the binding table entry - * as an immediate. - */ - surf_index = fs_reg(stage_prog_data->binding_table.ubo_start + - const_uniform_block->value.u[0]); - } else { - /* The block index is not a constant. Evaluate the index expression - * per-channel and add the base UBO index; the generator will select - * a value from any live channel. - */ - surf_index = fs_reg(this, glsl_type::uint_type); - emit(ADD(surf_index, op[0], - fs_reg(stage_prog_data->binding_table.ubo_start))) - ->force_writemask_all = true; - - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + - shader_prog->NumUniformBlocks - 1); - } - - if (const_offset) { - fs_reg packed_consts = fs_reg(this, glsl_type::float_type); - packed_consts.type = result.type; - - fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); - emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, - packed_consts, surf_index, const_offset_reg)); - - for (int i = 0; i < ir->type->vector_elements; i++) { - packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i); - - /* The std140 packing rules don't allow vectors to cross 16-byte - * boundaries, and a reg is 32 bytes. - */ - assert(packed_consts.subreg_offset < 32); - - /* UBO bools are any nonzero value. We consider bools to be - * values with the low bit set to 1. Convert them using CMP. - */ - if (ir->type->base_type == GLSL_TYPE_BOOL) { - emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ)); - } else { - emit(MOV(result, packed_consts)); - } - - result = offset(result, 1); - } - } else { - /* Turn the byte offset into a dword offset. */ - fs_reg base_offset = fs_reg(this, glsl_type::int_type); - emit(SHR(base_offset, op[1], fs_reg(2))); - - for (int i = 0; i < ir->type->vector_elements; i++) { - emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, - base_offset, i)); - - if (ir->type->base_type == GLSL_TYPE_BOOL) - emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ)); - - result = offset(result, 1); - } - } - - result.reg_offset = 0; - break; - } - - case ir_triop_fma: - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(MAD(this->result, op[2], op[1], op[0])); - break; - - case ir_triop_lrp: - emit_lrp(this->result, op[0], op[1], op[2]); - break; - - case ir_triop_csel: - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]); - inst->predicate = BRW_PREDICATE_NORMAL; - break; - - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - unreachable("already handled above"); - break; - } -} - -void -fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, - const glsl_type *type, bool predicated) -{ - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - for (unsigned int i = 0; i < type->components(); i++) { - l.type = brw_type_for_base_type(type); - r.type = brw_type_for_base_type(type); - - if (predicated || !l.equals(r)) { - fs_inst *inst = emit(MOV(l, r)); - inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE; - } - - l = offset(l, 1); - r = offset(r, 1); - } - break; - case GLSL_TYPE_ARRAY: - for (unsigned int i = 0; i < type->length; i++) { - emit_assignment_writes(l, r, type->fields.array, predicated); - } - break; - - case GLSL_TYPE_STRUCT: - for (unsigned int i = 0; i < type->length; i++) { - emit_assignment_writes(l, r, type->fields.structure[i].type, - predicated); - } - break; - - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - break; - - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - unreachable("not reached"); - } -} - -/* If the RHS processing resulted in an instruction generating a - * temporary value, and it would be easy to rewrite the instruction to - * generate its result right into the LHS instead, do so. This ends - * up reliably removing instructions where it can be tricky to do so - * later without real UD chain information. - */ -bool -fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, - fs_reg dst, - fs_reg src, - fs_inst *pre_rhs_inst, - fs_inst *last_rhs_inst) -{ - /* Only attempt if we're doing a direct assignment. */ - if (ir->condition || - !(ir->lhs->type->is_scalar() || - (ir->lhs->type->is_vector() && - ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) - return false; - - /* Make sure the last instruction generated our source reg. */ - fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, - last_rhs_inst, - src); - if (!modify) - return false; - - /* If last_rhs_inst wrote a different number of components than our LHS, - * we can't safely rewrite it. - */ - if (alloc.sizes[dst.reg] != modify->regs_written) - return false; - - /* Success! Rewrite the instruction. */ - modify->dst = dst; - - return true; -} - -void -fs_visitor::visit(ir_assignment *ir) -{ - fs_reg l, r; - fs_inst *inst; - - /* FINISHME: arrays on the lhs */ - ir->lhs->accept(this); - l = this->result; - - fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); - - ir->rhs->accept(this); - r = this->result; - - fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); - - assert(l.file != BAD_FILE); - assert(r.file != BAD_FILE); - - if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) - return; - - if (ir->condition) { - emit_bool_to_cond_code(ir->condition); - } - - if (ir->lhs->type->is_scalar() || - ir->lhs->type->is_vector()) { - for (int i = 0; i < ir->lhs->type->vector_elements; i++) { - if (ir->write_mask & (1 << i)) { - inst = emit(MOV(l, r)); - if (ir->condition) - inst->predicate = BRW_PREDICATE_NORMAL; - r = offset(r, 1); - } - l = offset(l, 1); - } - } else { - emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); - } -} - fs_inst * fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_c, fs_reg lod, fs_reg dPdy, - uint32_t sampler) + const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, + const fs_reg &sampler) { int mlen; int base_mrf = 1; @@ -1182,7 +268,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (shadow_c.file != BAD_FILE) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } @@ -1190,7 +276,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * the unused slots must be zeroed. */ for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); } mlen += 3; @@ -1198,25 +284,25 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* There's no plain shadow compare message, so we use shadow * compare with a bias of 0.0. */ - emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); mlen++; } else if (ir->op == ir_txb || ir->op == ir_txl) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), lod); mlen++; } else { unreachable("Should not get here."); } - emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c); mlen++; } else if (ir->op == ir_tex) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } /* zero the others. */ for (int i = ir->coordinate->type->vector_elements; i<3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; @@ -1224,7 +310,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_reg &dPdx = lod; for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } /* the slots for u and v are always present, but r is optional */ @@ -1245,20 +331,20 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * m5 m6 m7 m8 m9 m10 */ for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx); dPdx = offset(dPdx, 1); } mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy)); - dPdy = offset(dPdy, 1); + bld.MOV(fs_reg(MRF, base_mrf + mlen), lod2); + lod2 = offset(lod2, 1); } mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); } else if (ir->op == ir_txs) { /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ simd16 = true; - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); mlen += 2; } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod @@ -1268,8 +354,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf); for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), - coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), + coordinate); coordinate = offset(coordinate, 1); } @@ -1277,13 +363,13 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * be necessary for TXF (ld), but seems wise to do for all messages. */ for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); } /* lod/bias appears after u/v/r. */ mlen += 6; - emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod); mlen++; /* The unused upper half. */ @@ -1315,7 +401,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + fs_inst *inst = bld.emit(opcode, dst, reg_undef, sampler); inst->base_mrf = base_mrf; inst->mlen = mlen; inst->header_present = true; @@ -1323,7 +409,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (simd16) { for (int i = 0; i < 4; i++) { - emit(MOV(orig_dst, dst)); + bld.MOV(orig_dst, dst); orig_dst = offset(orig_dst, 1); dst = offset(dst, 2); } @@ -1341,9 +427,9 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * surprising in the disassembly. */ fs_inst * -fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_c, fs_reg lod, fs_reg lod2, - fs_reg sample_index, uint32_t sampler) +fs_visitor::emit_texture_gen5(ir_texture *ir, const fs_reg &dst, fs_reg coordinate, + const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, + const fs_reg &sample_index, const fs_reg &sampler) { int reg_width = dispatch_width / 8; bool header_present = false; @@ -1362,7 +448,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, } for (int i = 0; i < vector_elements; i++) { - emit(MOV(retype(offset(msg_coords, i), coordinate.type), coordinate)); + bld.MOV(retype(offset(msg_coords, i), coordinate.type), coordinate); coordinate = offset(coordinate, 1); } fs_reg msg_end = offset(msg_coords, vector_elements); @@ -1370,7 +456,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (shadow_c.file != BAD_FILE) { fs_reg msg_shadow = msg_lod; - emit(MOV(msg_shadow, shadow_c)); + bld.MOV(msg_shadow, shadow_c); msg_lod = offset(msg_shadow, 1); msg_end = msg_lod; } @@ -1381,13 +467,13 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, opcode = SHADER_OPCODE_TEX; break; case ir_txb: - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = FS_OPCODE_TXB; break; case ir_txl: - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXL; @@ -1404,11 +490,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, */ msg_end = msg_lod; for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { - emit(MOV(msg_end, lod)); + bld.MOV(msg_end, lod); lod = offset(lod, 1); msg_end = offset(msg_end, 1); - emit(MOV(msg_end, lod2)); + bld.MOV(msg_end, lod2); lod2 = offset(lod2, 1); msg_end = offset(msg_end, 1); } @@ -1418,21 +504,21 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, } case ir_txs: msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD); - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXS; break; case ir_query_levels: msg_lod = msg_end; - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXS; break; case ir_txf: msg_lod = offset(msg_coords, 3); - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXF; @@ -1440,9 +526,9 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txf_ms: msg_lod = offset(msg_coords, 3); /* lod */ - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); /* sample index */ - emit(MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index)); + bld.MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index); msg_end = offset(msg_lod, 2); opcode = SHADER_OPCODE_TXF_CMS; @@ -1457,7 +543,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + fs_inst *inst = bld.emit(opcode, dst, reg_undef, sampler); inst->base_mrf = message.reg; inst->mlen = msg_end.reg - message.reg; inst->header_present = header_present; @@ -1481,14 +567,15 @@ is_high_sampler(struct brw_context *brw, fs_reg sampler) } fs_inst * -fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_c, fs_reg lod, fs_reg lod2, - fs_reg sample_index, fs_reg mcs, fs_reg sampler) +fs_visitor::emit_texture_gen7(ir_texture *ir, const fs_reg &dst, fs_reg coordinate, + const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, + fs_reg offset_val, const fs_reg &sample_index, + const fs_reg &mcs, const fs_reg &sampler) { - int reg_width = dispatch_width / 8; + int reg_width = bld.dispatch_width() / 8; bool header_present = false; - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE); + for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) { sources[i] = fs_reg(this, glsl_type::float_type); } @@ -1512,7 +599,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } if (shadow_c.file != BAD_FILE) { - emit(MOV(sources[length], shadow_c)); + bld.MOV(sources[length], shadow_c); length++; } @@ -1525,11 +612,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_lod: break; case ir_txb: - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); length++; break; case ir_txl: - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); length++; break; case ir_txd: { @@ -1539,19 +626,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z */ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(sources[length], coordinate)); - coordinate = offset(coordinate, 1); - length++; + bld.MOV(sources[length], coordinate); + coordinate = offset(coordinate, 1); + length++; /* For cube map array, the coordinate is (u,v,r,ai) but there are * only derivatives for (u, v, r). */ if (i < ir->lod_info.grad.dPdx->type->vector_elements) { - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); lod = offset(lod, 1); length++; - emit(MOV(sources[length], lod2)); + bld.MOV(sources[length], lod2); lod2 = offset(lod2, 1); length++; } @@ -1561,43 +648,43 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, break; } case ir_txs: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod); length++; break; case ir_query_levels: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)); length++; break; case ir_txf: /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); length++; - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod); length++; for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); - coordinate = offset(coordinate, 1); - length++; + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); + coordinate = offset(coordinate, 1); + length++; } coordinate_done = true; break; case ir_txf_ms: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index); length++; /* data from the multisample control surface */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs); length++; /* there is no offsetting for this message; just copy in the integer * texture coordinates */ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); length++; } @@ -1610,23 +697,20 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, no16("Gen7 does not support gather4_po_c in SIMD16 mode."); /* More crazy intermixing */ - ir->offset->accept(this); - fs_reg offset_value = this->result; - for (int i = 0; i < 2; i++) { /* u, v */ - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } for (int i = 0; i < 2; i++) { /* offu, offv */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value)); - offset_value = offset(offset_value, 1); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_val); + offset_val = offset(offset_val, 1); length++; } if (ir->coordinate->type->vector_elements == 3) { /* r if present */ - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } @@ -1639,7 +723,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* Set up the coordinate (except for cases where it was done above) */ if (ir->coordinate && !coordinate_done) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } @@ -1651,9 +735,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, else mlen = length * reg_width; - fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_F); - emit(LOAD_PAYLOAD(src_payload, sources, length)); + fs_reg payload = bld.natural_reg(BRW_REGISTER_TYPE_F, mlen); + bld.LOAD_PAYLOAD(payload, sources, length); /* Generate the SEND */ enum opcode opcode; @@ -1676,7 +759,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, default: unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, src_payload, sampler); + instruction *inst = bld.emit(opcode, dst, payload, sampler); inst->base_mrf = -1; inst->mlen = mlen; inst->header_present = header_present; @@ -1690,489 +773,22 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, return inst; } -fs_reg -fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, - bool is_rect, uint32_t sampler, int texunit) -{ - fs_inst *inst = NULL; - bool needs_gl_clamp = true; - fs_reg scale_x, scale_y; - const struct brw_sampler_prog_key_data *tex = - (stage == MESA_SHADER_FRAGMENT) ? - &((brw_wm_prog_key*) this->key)->tex : NULL; - assert(tex); - - /* The 965 requires the EU to do the normalization of GL rectangle - * texture coordinates. We use the program parameter state - * tracking to get the scaling factor. - */ - if (is_rect && - (brw->gen < 6 || - (brw->gen >= 6 && (tex->gl_clamp_mask[0] & (1 << sampler) || - tex->gl_clamp_mask[1] & (1 << sampler))))) { - struct gl_program_parameter_list *params = prog->Parameters; - int tokens[STATE_LENGTH] = { - STATE_INTERNAL, - STATE_TEXRECT_SCALE, - texunit, - 0, - 0 - }; - - no16("rectangle scale uniform setup not supported on SIMD16\n"); - if (dispatch_width == 16) { - return coordinate; - } - - GLuint index = _mesa_add_state_reference(params, - (gl_state_index *)tokens); - /* Try to find existing copies of the texrect scale uniforms. */ - for (unsigned i = 0; i < uniforms; i++) { - if (stage_prog_data->param[i] == - &prog->Parameters->ParameterValues[index][0]) { - scale_x = fs_reg(UNIFORM, i); - scale_y = fs_reg(UNIFORM, i + 1); - break; - } - } - - /* If we didn't already set them up, do so now. */ - if (scale_x.file == BAD_FILE) { - scale_x = fs_reg(UNIFORM, uniforms); - scale_y = fs_reg(UNIFORM, uniforms + 1); - - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][0]; - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][1]; - } - } - - /* The 965 requires the EU to do the normalization of GL rectangle - * texture coordinates. We use the program parameter state - * tracking to get the scaling factor. - */ - if (brw->gen < 6 && is_rect) { - fs_reg dst = fs_reg(this, ir->coordinate->type); - fs_reg src = coordinate; - coordinate = dst; - - emit(MUL(dst, src, scale_x)); - dst = offset(dst, 1); - src = offset(src, 1); - emit(MUL(dst, src, scale_y)); - } else if (is_rect) { - /* On gen6+, the sampler handles the rectangle coordinates - * natively, without needing rescaling. But that means we have - * to do GL_CLAMP clamping at the [0, width], [0, height] scale, - * not [0, 1] like the default case below. - */ - needs_gl_clamp = false; - - for (int i = 0; i < 2; i++) { - if (tex->gl_clamp_mask[i] & (1 << sampler)) { - fs_reg chan = coordinate; - chan = offset(chan, i); - - inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_G; - - /* Our parameter comes in as 1.0/width or 1.0/height, - * because that's what people normally want for doing - * texture rectangle handling. We need width or height - * for clamping, but we don't care enough to make a new - * parameter type, so just invert back. - */ - fs_reg limit = fs_reg(this, glsl_type::float_type); - emit(MOV(limit, i == 0 ? scale_x : scale_y)); - emit(SHADER_OPCODE_RCP, limit, limit); - - inst = emit(BRW_OPCODE_SEL, chan, chan, limit); - inst->conditional_mod = BRW_CONDITIONAL_L; - } - } - } - - if (ir->coordinate && needs_gl_clamp) { - for (unsigned int i = 0; - i < MIN2(ir->coordinate->type->vector_elements, 3); i++) { - if (tex->gl_clamp_mask[i] & (1 << sampler)) { - fs_reg chan = coordinate; - chan = offset(chan, i); - - fs_inst *inst = emit(MOV(chan, chan)); - inst->saturate = true; - } - } - } - return coordinate; -} - -/* Sample from the MCS surface attached to this multisample texture. */ -fs_reg -fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, fs_reg sampler) -{ - int reg_width = dispatch_width / 8; - int length = ir->coordinate->type->vector_elements; - fs_reg payload = fs_reg(GRF, alloc.allocate(length * reg_width), - BRW_REGISTER_TYPE_F); - fs_reg dest = fs_reg(this, glsl_type::uvec4_type); - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, length); - - /* parameters are: u, v, r; missing parameters are treated as zero */ - for (int i = 0; i < length; i++) { - sources[i] = fs_reg(this, glsl_type::float_type); - emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate)); - coordinate = offset(coordinate, 1); - } - - emit(LOAD_PAYLOAD(payload, sources, length)); - - fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler); - inst->base_mrf = -1; - inst->mlen = length * reg_width; - inst->header_present = false; - inst->regs_written = 4 * reg_width; /* we only care about one reg of - * response, but the sampler always - * writes 4/8 - */ - - return dest; -} - -void -fs_visitor::visit(ir_texture *ir) -{ - const struct brw_sampler_prog_key_data *tex = - (stage == MESA_SHADER_FRAGMENT) ? - &((brw_wm_prog_key*) this->key)->tex : NULL; - assert(tex); - fs_inst *inst = NULL; - - uint32_t sampler = - _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); - - ir_rvalue *nonconst_sampler_index = - _mesa_get_sampler_array_nonconst_index(ir->sampler); - - /* Handle non-constant sampler array indexing */ - fs_reg sampler_reg; - if (nonconst_sampler_index) { - /* The highest sampler which may be used by this operation is - * the last element of the array. Mark it here, because the generator - * doesn't have enough information to determine the bound. - */ - uint32_t array_size = ir->sampler->as_dereference_array() - ->array->type->array_size(); - - uint32_t max_used = sampler + array_size - 1; - if (ir->op == ir_tg4 && brw->gen < 8) { - max_used += stage_prog_data->binding_table.gather_texture_start; - } else { - max_used += stage_prog_data->binding_table.texture_start; - } - - brw_mark_surface_used(prog_data, max_used); - - /* Emit code to evaluate the actual indexing expression */ - nonconst_sampler_index->accept(this); - fs_reg temp(this, glsl_type::uint_type); - emit(ADD(temp, this->result, fs_reg(sampler))) - ->force_writemask_all = true; - sampler_reg = temp; - } else { - /* Single sampler, or constant array index; the indexing expression - * is just an immediate. - */ - sampler_reg = fs_reg(sampler); - } - - /* FINISHME: We're failing to recompile our programs when the sampler is - * updated. This only matters for the texture rectangle scale parameters - * (pre-gen6, or gen6+ with GL_CLAMP). - */ - int texunit = prog->SamplerUnits[sampler]; - - if (ir->op == ir_tg4) { - /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother - * emitting anything other than setting up the constant result. - */ - ir_constant *chan = ir->lod_info.component->as_constant(); - int swiz = GET_SWZ(tex->swizzles[sampler], chan->value.i[0]); - if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { - - fs_reg res = fs_reg(this, glsl_type::vec4_type); - this->result = res; - - for (int i=0; i<4; i++) { - emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f))); - res = offset(res, 1); - } - return; - } - } - - /* Should be lowered by do_lower_texture_projection */ - assert(!ir->projector); - - /* Should be lowered */ - assert(!ir->offset || !ir->offset->type->is_array()); - - /* Generate code to compute all the subexpression trees. This has to be - * done before loading any values into MRFs for the sampler message since - * generating these values may involve SEND messages that need the MRFs. - */ - fs_reg coordinate; - if (ir->coordinate) { - ir->coordinate->accept(this); - - coordinate = rescale_texcoord(ir, this->result, - ir->sampler->type->sampler_dimensionality == - GLSL_SAMPLER_DIM_RECT, - sampler, texunit); - } - - fs_reg shadow_comparitor; - if (ir->shadow_comparitor) { - ir->shadow_comparitor->accept(this); - shadow_comparitor = this->result; - } - - fs_reg lod, lod2, sample_index, mcs; - switch (ir->op) { - case ir_tex: - case ir_lod: - case ir_tg4: - case ir_query_levels: - break; - case ir_txb: - ir->lod_info.bias->accept(this); - lod = this->result; - break; - case ir_txd: - ir->lod_info.grad.dPdx->accept(this); - lod = this->result; - - ir->lod_info.grad.dPdy->accept(this); - lod2 = this->result; - break; - case ir_txf: - case ir_txl: - case ir_txs: - ir->lod_info.lod->accept(this); - lod = this->result; - break; - case ir_txf_ms: - ir->lod_info.sample_index->accept(this); - sample_index = this->result; - - if (brw->gen >= 7 && tex->compressed_multisample_layout_mask & (1<<sampler)) - mcs = emit_mcs_fetch(ir, coordinate, sampler_reg); - else - mcs = fs_reg(0u); - break; - default: - unreachable("Unrecognized texture opcode"); - }; - - /* Writemasking doesn't eliminate channels on SIMD8 texture - * samples, so don't worry about them. - */ - fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); - - if (brw->gen >= 7) { - inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor, - lod, lod2, sample_index, mcs, sampler_reg); - } else if (brw->gen >= 5) { - inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor, - lod, lod2, sample_index, sampler); - } else { - inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor, - lod, lod2, sampler); - } - - if (ir->offset != NULL && ir->op != ir_txf) - inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant()); - - if (ir->op == ir_tg4) - inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17 - - if (ir->shadow_comparitor) - inst->shadow_compare = true; - - /* fixup #layers for cube map arrays */ - if (ir->op == ir_txs) { - glsl_type const *type = ir->sampler->type; - if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - type->sampler_array) { - fs_reg depth = offset(dst, 2); - fs_reg fixed_depth = fs_reg(this, glsl_type::int_type); - emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); - - fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); - int components = inst->regs_written / (dst.width / 8); - for (int i = 0; i < components; i++) { - if (i == 2) { - fixed_payload[i] = fixed_depth; - } else { - fixed_payload[i] = offset(dst, i); - } - } - emit(LOAD_PAYLOAD(dst, fixed_payload, components)); - } - } - - if (brw->gen == 6 && ir->op == ir_tg4) { - emit_gen6_gather_wa(tex->gen6_gather_wa[sampler], dst); - } - - swizzle_result(ir, dst, sampler); -} - -/** - * Apply workarounds for Gen6 gather with UINT/SINT - */ -void -fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) -{ - if (!wa) - return; - - int width = (wa & WA_8BIT) ? 8 : 16; - - for (int i = 0; i < 4; i++) { - fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F); - /* Convert from UNORM to UINT */ - emit(MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1)))); - emit(MOV(dst, dst_f)); - - if (wa & WA_SIGN) { - /* Reinterpret the UINT value as a signed INT value by - * shifting the sign bit into place, then shifting back - * preserving sign. - */ - emit(SHL(dst, dst, fs_reg(32 - width))); - emit(ASR(dst, dst, fs_reg(32 - width))); - } - - dst = offset(dst, 1); - } -} - -/** - * Set up the gather channel based on the swizzle, for gather4. - */ -uint32_t -fs_visitor::gather_channel(ir_texture *ir, uint32_t sampler) -{ - const struct brw_sampler_prog_key_data *tex = - (stage == MESA_SHADER_FRAGMENT) ? - &((brw_wm_prog_key*) this->key)->tex : NULL; - assert(tex); - ir_constant *chan = ir->lod_info.component->as_constant(); - int swiz = GET_SWZ(tex->swizzles[sampler], chan->value.i[0]); - switch (swiz) { - case SWIZZLE_X: return 0; - case SWIZZLE_Y: - /* gather4 sampler is broken for green channel on RG32F -- - * we must ask for blue instead. - */ - if (tex->gather_channel_quirk_mask & (1<<sampler)) - return 2; - return 1; - case SWIZZLE_Z: return 2; - case SWIZZLE_W: return 3; - default: - unreachable("Not reached"); /* zero, one swizzles handled already */ - } -} - -/** - * Swizzle the result of a texture result. This is necessary for - * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. - */ -void -fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, uint32_t sampler) -{ - if (ir->op == ir_query_levels) { - /* # levels is in .w */ - this->result = offset(orig_val, 3); - return; - } - - this->result = orig_val; - - /* txs,lod don't actually sample the texture, so swizzling the result - * makes no sense. - */ - if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4) - return; - - const struct brw_sampler_prog_key_data *tex = - (stage == MESA_SHADER_FRAGMENT) ? - &((brw_wm_prog_key*) this->key)->tex : NULL; - assert(tex); - - if (ir->type == glsl_type::float_type) { - /* Ignore DEPTH_TEXTURE_MODE swizzling. */ - assert(ir->sampler->type->sampler_shadow); - } else if (tex->swizzles[sampler] != SWIZZLE_NOOP) { - fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); - - for (int i = 0; i < 4; i++) { - int swiz = GET_SWZ(tex->swizzles[sampler], i); - fs_reg l = swizzled_result; - l = offset(l, i); - - if (swiz == SWIZZLE_ZERO) { - emit(MOV(l, fs_reg(0.0f))); - } else if (swiz == SWIZZLE_ONE) { - emit(MOV(l, fs_reg(1.0f))); - } else { - emit(MOV(l, offset(orig_val, - GET_SWZ(tex->swizzles[sampler], i)))); - } - } - this->result = swizzled_result; - } -} - -void -fs_visitor::visit(ir_swizzle *ir) +fs_inst * +fs_visitor::emit_texture(ir_texture *ir, const fs_reg &dst, + const fs_reg &coordinate, const fs_reg &shadow_c, + const fs_reg &lod, const fs_reg &lod2, + const fs_reg &offset_val, const fs_reg &sample_index, + const fs_reg &mcs, const fs_reg &sampler) { - ir->val->accept(this); - fs_reg val = this->result; - - if (ir->type->vector_elements == 1) { - this->result = offset(this->result, ir->mask.x); - return; - } - - fs_reg result = fs_reg(this, ir->type); - this->result = result; - - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - fs_reg channel = val; - int swiz = 0; - - switch (i) { - case 0: - swiz = ir->mask.x; - break; - case 1: - swiz = ir->mask.y; - break; - case 2: - swiz = ir->mask.z; - break; - case 3: - swiz = ir->mask.w; - break; - } - - emit(MOV(result, offset(channel, swiz))); - result = offset(result, 1); - } + if (brw->gen >= 7) + return emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, lod2, + offset_val, sample_index, mcs, sampler); + else if (brw->gen >= 5) + return emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, lod2, + sample_index, sampler); + else + return emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, lod2, + sampler); } void @@ -2187,8 +803,8 @@ fs_visitor::visit(ir_discard *ir) */ fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg, - BRW_CONDITIONAL_NZ)); + fs_inst *cmp = bld.CMP(bld.reg_null_f(), some_reg, some_reg, + BRW_CONDITIONAL_NZ); cmp->predicate = BRW_PREDICATE_NORMAL; cmp->flag_subreg = 1; @@ -2196,7 +812,7 @@ fs_visitor::visit(ir_discard *ir) /* For performance, after a discard, jump to the end of the shader. * Only jump if all relevant channels have been discarded. */ - fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP); + fs_inst *discard_jump = bld.emit(FS_OPCODE_DISCARD_JUMP); discard_jump->flag_subreg = 1; discard_jump->predicate = (dispatch_width == 8) @@ -2206,292 +822,6 @@ fs_visitor::visit(ir_discard *ir) } } -void -fs_visitor::visit(ir_constant *ir) -{ - /* Set this->result to reg at the bottom of the function because some code - * paths will cause this visitor to be applied to other fields. This will - * cause the value stored in this->result to be modified. - * - * Make reg constant so that it doesn't get accidentally modified along the - * way. Yes, I actually had this problem. :( - */ - const fs_reg reg(this, ir->type); - fs_reg dst_reg = reg; - - if (ir->type->is_array()) { - const unsigned size = type_size(ir->type->fields.array); - - for (unsigned i = 0; i < ir->type->length; i++) { - ir->array_elements[i]->accept(this); - fs_reg src_reg = this->result; - - dst_reg.type = src_reg.type; - for (unsigned j = 0; j < size; j++) { - emit(MOV(dst_reg, src_reg)); - src_reg = offset(src_reg, 1); - dst_reg = offset(dst_reg, 1); - } - } - } else if (ir->type->is_record()) { - foreach_in_list(ir_constant, field, &ir->components) { - const unsigned size = type_size(field->type); - - field->accept(this); - fs_reg src_reg = this->result; - - dst_reg.type = src_reg.type; - for (unsigned j = 0; j < size; j++) { - emit(MOV(dst_reg, src_reg)); - src_reg = offset(src_reg, 1); - dst_reg = offset(dst_reg, 1); - } - } - } else { - const unsigned size = type_size(ir->type); - - for (unsigned i = 0; i < size; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(MOV(dst_reg, fs_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_UINT: - emit(MOV(dst_reg, fs_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_INT: - emit(MOV(dst_reg, fs_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_BOOL: - emit(MOV(dst_reg, - fs_reg(ir->value.b[i] != 0 ? ctx->Const.UniformBooleanTrue - : 0))); - break; - default: - unreachable("Non-float/uint/int/bool constant"); - } - dst_reg = offset(dst_reg, 1); - } - } - - this->result = reg; -} - -void -fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) -{ - ir_expression *expr = ir->as_expression(); - - if (!expr || expr->operation == ir_binop_ubo_load) { - ir->accept(this); - - fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - } - - fs_reg op[3]; - fs_inst *inst; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - - expr->operands[i]->accept(this); - op[i] = this->result; - - resolve_ud_negate(&op[i]); - } - - switch (expr->operation) { - case ir_unop_logic_not: - inst = emit(AND(reg_null_d, op[0], fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - - case ir_binop_logic_xor: - if (ctx->Const.UniformBooleanTrue == 1) { - fs_reg dst = fs_reg(this, glsl_type::uint_type); - emit(XOR(dst, op[0], op[1])); - inst = emit(AND(reg_null_d, dst, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } else { - inst = emit(XOR(reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_binop_logic_or: - if (ctx->Const.UniformBooleanTrue == 1) { - fs_reg dst = fs_reg(this, glsl_type::uint_type); - emit(OR(dst, op[0], op[1])); - inst = emit(AND(reg_null_d, dst, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } else { - inst = emit(OR(reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_binop_logic_and: - if (ctx->Const.UniformBooleanTrue == 1) { - fs_reg dst = fs_reg(this, glsl_type::uint_type); - emit(AND(dst, op[0], op[1])); - inst = emit(AND(reg_null_d, dst, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } else { - inst = emit(AND(reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_unop_f2b: - if (brw->gen >= 6) { - emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(reg_null_f, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_unop_i2b: - if (brw->gen >= 6) { - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (ctx->Const.UniformBooleanTrue == 1) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - - emit(CMP(reg_null_d, op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - break; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to return. */ - fs_reg temp(this, expr->operands[1]->type); - inst = emit(SEL(temp, op[1], op[2])); - inst->predicate = BRW_PREDICATE_NORMAL; - - /* Expand the result to a condition code. */ - inst = emit(MOV(reg_null_d, temp)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - } - - default: - unreachable("not reached"); - } -} - -/** - * Emit a gen6 IF statement with the comparison folded into the IF - * instruction. - */ -void -fs_visitor::emit_if_gen6(ir_if *ir) -{ - ir_expression *expr = ir->condition->as_expression(); - - if (expr && expr->operation != ir_binop_ubo_load) { - fs_reg op[3]; - fs_inst *inst; - fs_reg temp; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - - expr->operands[i]->accept(this); - op[i] = this->result; - } - - switch (expr->operation) { - case ir_unop_logic_not: - emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_Z)); - return; - - case ir_binop_logic_xor: - emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_or: - temp = fs_reg(this, glsl_type::bool_type); - emit(OR(temp, op[0], op[1])); - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_and: - temp = fs_reg(this, glsl_type::bool_type); - emit(AND(temp, op[0], op[1])); - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_unop_f2b: - inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - - case ir_unop_i2b: - emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (ctx->Const.UniformBooleanTrue == 1) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - - emit(IF(op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - return; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - fs_inst *inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to use as the result. */ - fs_reg temp(this, expr->operands[1]->type); - inst = emit(SEL(temp, op[1], op[2])); - inst->predicate = BRW_PREDICATE_NORMAL; - - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - } - - default: - unreachable("not reached"); - } - } - - ir->condition->accept(this); - emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ)); -} - /** * Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL. * @@ -2558,19 +888,19 @@ fs_visitor::try_replace_with_sel() if (src0.file == IMM) { src0 = fs_reg(this, glsl_type::float_type); src0.type = then_mov->src[0].type; - emit(MOV(src0, then_mov->src[0])); + bld.MOV(src0, then_mov->src[0]); } fs_inst *sel; if (if_inst->conditional_mod) { /* Sandybridge-specific IF with embedded comparison */ - emit(CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod)); - sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]); + bld.CMP(bld.reg_null_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); + sel = bld.emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]); sel->predicate = BRW_PREDICATE_NORMAL; } else { /* Separate CMP and IF instructions */ - sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]); + sel = bld.emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]); sel->predicate = if_inst->predicate; sel->predicate_inverse = if_inst->predicate_inverse; } @@ -2578,165 +908,6 @@ fs_visitor::try_replace_with_sel() } void -fs_visitor::visit(ir_if *ir) -{ - if (brw->gen < 6) { - no16("Can't support (non-uniform) control flow on SIMD16\n"); - } - - /* Don't point the annotation at the if statement, because then it plus - * the then and else blocks get printed. - */ - this->base_ir = ir->condition; - - if (brw->gen == 6) { - emit_if_gen6(ir); - } else { - emit_bool_to_cond_code(ir->condition); - - emit(IF(BRW_PREDICATE_NORMAL)); - } - - foreach_in_list(ir_instruction, ir_, &ir->then_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - - if (!ir->else_instructions.is_empty()) { - emit(BRW_OPCODE_ELSE); - - foreach_in_list(ir_instruction, ir_, &ir->else_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - } - - emit(BRW_OPCODE_ENDIF); - - try_replace_with_sel(); -} - -void -fs_visitor::visit(ir_loop *ir) -{ - if (brw->gen < 6) { - no16("Can't support (non-uniform) control flow on SIMD16\n"); - } - - this->base_ir = NULL; - emit(BRW_OPCODE_DO); - - foreach_in_list(ir_instruction, ir_, &ir->body_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - - this->base_ir = NULL; - emit(BRW_OPCODE_WHILE); -} - -void -fs_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit(BRW_OPCODE_BREAK); - break; - case ir_loop_jump::jump_continue: - emit(BRW_OPCODE_CONTINUE); - break; - } -} - -void -fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir) -{ - ir_dereference *deref = static_cast<ir_dereference *>( - ir->actual_parameters.get_head()); - ir_variable *location = deref->variable_referenced(); - unsigned surf_index = (stage_prog_data->binding_table.abo_start + - location->data.binding); - - /* Calculate the surface offset */ - fs_reg offset(this, glsl_type::uint_type); - ir_dereference_array *deref_array = deref->as_dereference_array(); - - if (deref_array) { - deref_array->array_index->accept(this); - - fs_reg tmp(this, glsl_type::uint_type); - emit(MUL(tmp, this->result, fs_reg(ATOMIC_COUNTER_SIZE))); - emit(ADD(offset, tmp, fs_reg(location->data.atomic.offset))); - } else { - offset = fs_reg(location->data.atomic.offset); - } - - /* Emit the appropriate machine instruction */ - const char *callee = ir->callee->function_name(); - ir->return_deref->accept(this); - fs_reg dst = this->result; - - if (!strcmp("__intrinsic_atomic_read", callee)) { - emit_untyped_surface_read(surf_index, dst, offset); - - } else if (!strcmp("__intrinsic_atomic_increment", callee)) { - emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, - fs_reg(), fs_reg()); - - } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { - emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, - fs_reg(), fs_reg()); - } -} - -void -fs_visitor::visit(ir_call *ir) -{ - const char *callee = ir->callee->function_name(); - - if (!strcmp("__intrinsic_atomic_read", callee) || - !strcmp("__intrinsic_atomic_increment", callee) || - !strcmp("__intrinsic_atomic_predecrement", callee)) { - visit_atomic_counter_intrinsic(ir); - } else { - unreachable("Unsupported intrinsic."); - } -} - -void -fs_visitor::visit(ir_return *) -{ - unreachable("FINISHME"); -} - -void -fs_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined before we get to ir_to_mesa. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(NULL, &empty, false); - - assert(sig); - - foreach_in_list(ir_instruction, ir_, &sig->body) { - this->base_ir = ir_; - ir_->accept(this); - } - } -} - -void -fs_visitor::visit(ir_function_signature *) -{ - unreachable("not reached"); -} - -void fs_visitor::visit(ir_emit_vertex *) { unreachable("not reached"); @@ -2748,129 +919,6 @@ fs_visitor::visit(ir_end_primitive *) unreachable("not reached"); } -void -fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, - fs_reg dst, fs_reg offset, fs_reg src0, - fs_reg src1) -{ - bool uses_kill = - (stage == MESA_SHADER_FRAGMENT) && - ((brw_wm_prog_data*) this->prog_data)->uses_kill; - int reg_width = dispatch_width / 8; - int length = 0; - - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4); - - sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - /* Initialize the sample mask in the message header. */ - emit(MOV(sources[0], fs_reg(0u))) - ->force_writemask_all = true; - - if (uses_kill) { - emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1))) - ->force_writemask_all = true; - } else { - emit(MOV(component(sources[0], 7), - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) - ->force_writemask_all = true; - } - length++; - - /* Set the atomic operation offset. */ - sources[1] = fs_reg(this, glsl_type::uint_type); - emit(MOV(sources[1], offset)); - length++; - - /* Set the atomic operation arguments. */ - if (src0.file != BAD_FILE) { - sources[length] = fs_reg(this, glsl_type::uint_type); - emit(MOV(sources[length], src0)); - length++; - } - - if (src1.file != BAD_FILE) { - sources[length] = fs_reg(this, glsl_type::uint_type); - emit(MOV(sources[length], src1)); - length++; - } - - int mlen = 1 + (length - 1) * reg_width; - fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_UD); - emit(LOAD_PAYLOAD(src_payload, sources, length)); - - /* Emit the instruction. */ - fs_inst *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, - fs_reg(atomic_op), fs_reg(surf_index)); - inst->mlen = mlen; -} - -void -fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, - fs_reg offset) -{ - bool uses_kill = - (stage == MESA_SHADER_FRAGMENT) && - ((brw_wm_prog_data*) this->prog_data)->uses_kill; - int reg_width = dispatch_width / 8; - - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2); - - sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - /* Initialize the sample mask in the message header. */ - emit(MOV(sources[0], fs_reg(0u))) - ->force_writemask_all = true; - - if (uses_kill) { - emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1))) - ->force_writemask_all = true; - } else { - emit(MOV(component(sources[0], 7), - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) - ->force_writemask_all = true; - } - - /* Set the surface read offset. */ - sources[1] = fs_reg(this, glsl_type::uint_type); - emit(MOV(sources[1], offset)); - - int mlen = 1 + reg_width; - fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_UD); - fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2)); - - /* Emit the instruction. */ - inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload, - fs_reg(surf_index)); - inst->mlen = mlen; -} - -fs_inst * -fs_visitor::emit(fs_inst *inst) -{ - if (force_uncompressed_stack > 0) - inst->exec_size = 8; - - if (dispatch_width == 16 && inst->exec_size == 8) - inst->force_uncompressed = true; - - inst->annotation = this->current_annotation; - inst->ir = this->base_ir; - - this->instructions.push_tail(inst); - - return inst; -} - -void -fs_visitor::emit(exec_list list) -{ - foreach_in_list_safe(fs_inst, inst, &list) { - inst->exec_node::remove(); - emit(inst); - } -} - /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ void fs_visitor::emit_dummy_fs() @@ -2878,13 +926,13 @@ fs_visitor::emit_dummy_fs() int reg_width = dispatch_width / 8; /* Everyone's favorite color. */ - emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f))); - emit(MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f))); - emit(MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f))); - emit(MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)); + bld.MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)); + bld.MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)); + bld.MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)); fs_inst *write; - write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); + write = bld.emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); write->base_mrf = 2; write->mlen = 4 * reg_width; write->eot = true; @@ -2911,16 +959,16 @@ fs_visitor::interp_reg(int location, int channel) void fs_visitor::emit_interpolation_setup_gen4() { - this->current_annotation = "compute pixel centers"; + bld.set_annotation("compute pixel centers"); this->pixel_x = fs_reg(this, glsl_type::uint_type); this->pixel_y = fs_reg(this, glsl_type::uint_type); this->pixel_x.type = BRW_REGISTER_TYPE_UW; this->pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(FS_OPCODE_PIXEL_X, this->pixel_x); - emit(FS_OPCODE_PIXEL_Y, this->pixel_y); + bld.emit(FS_OPCODE_PIXEL_X, this->pixel_x); + bld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y); - this->current_annotation = "compute pixel deltas from v0"; + bld.set_annotation("compute pixel deltas from v0"); if (brw->has_pln) { this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = fs_reg(this, glsl_type::vec2_type); @@ -2932,24 +980,24 @@ fs_visitor::emit_interpolation_setup_gen4() this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = fs_reg(this, glsl_type::float_type); } - emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], - this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))))); - emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], - this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))))); + bld.ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], + this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); + bld.ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], + this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); - this->current_annotation = "compute pos.w and 1/pos.w"; + bld.set_annotation("compute pos.w and 1/pos.w"); /* Compute wpos.w. It's always in our setup, since it's needed to * interpolate the other attributes. */ this->wpos_w = fs_reg(this, glsl_type::float_type); - emit(FS_OPCODE_LINTERP, wpos_w, + bld.emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], interp_reg(VARYING_SLOT_POS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); - this->current_annotation = NULL; + bld.emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); + bld.set_annotation(NULL); } /** Emits the interpolation for the varying inputs. */ @@ -2959,17 +1007,17 @@ fs_visitor::emit_interpolation_setup_gen6() struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); /* If the pixel centers end up used, the setup is the same as for gen4. */ - this->current_annotation = "compute pixel centers"; + bld.set_annotation("compute pixel centers"); fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); int_pixel_x.type = BRW_REGISTER_TYPE_UW; int_pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(ADD(int_pixel_x, + bld.ADD(int_pixel_x, fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010)))); - emit(ADD(int_pixel_y, + fs_reg(brw_imm_v(0x10101010))); + bld.ADD(int_pixel_y, fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100)))); + fs_reg(brw_imm_v(0x11001100))); /* As of gen6, we can no longer mix float and int sources. We have * to turn the integer pixel centers into floats for their actual @@ -2977,13 +1025,13 @@ fs_visitor::emit_interpolation_setup_gen6() */ this->pixel_x = fs_reg(this, glsl_type::float_type); this->pixel_y = fs_reg(this, glsl_type::float_type); - emit(MOV(this->pixel_x, int_pixel_x)); - emit(MOV(this->pixel_y, int_pixel_y)); + bld.MOV(this->pixel_x, int_pixel_x); + bld.MOV(this->pixel_y, int_pixel_y); - this->current_annotation = "compute pos.w"; + bld.set_annotation("compute pos.w"); this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0)); this->wpos_w = fs_reg(this, glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); + bld.emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { uint8_t reg = payload.barycentric_coord_reg[i]; @@ -2991,7 +1039,7 @@ fs_visitor::emit_interpolation_setup_gen6() this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); } - this->current_annotation = NULL; + bld.set_annotation(NULL); } int @@ -3035,7 +1083,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) if (colors_enabled & (1 << i)) { dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8), color.type, color.width); - inst = emit(MOV(dst[len], offset(color, i))); + inst = bld.MOV(dst[len], offset(color, i)); inst->saturate = key->clamp_fragment_color; } else if (color.width == 16) { /* We need two BAD_FILE slots for a 16-wide color */ @@ -3058,11 +1106,11 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) for (unsigned i = 0; i < 4; ++i) { if (colors_enabled & (1 << i)) { dst[i] = fs_reg(GRF, alloc.allocate(1), color.type); - inst = emit(MOV(dst[i], half(offset(color, i), 0))); + inst = bld.MOV(dst[i], half(offset(color, i), 0)); inst->saturate = key->clamp_fragment_color; dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type); - inst = emit(MOV(dst[i + 4], half(offset(color, i), 1))); + inst = bld.MOV(dst[i + 4], half(offset(color, i), 1)); inst->saturate = key->clamp_fragment_color; inst->force_sechalf = true; } @@ -3101,7 +1149,7 @@ fs_visitor::emit_alpha_test() { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - this->current_annotation = "Alpha test"; + bld.set_annotation("Alpha test"); fs_inst *cmp; if (key->alpha_test_func == GL_ALWAYS) @@ -3111,15 +1159,15 @@ fs_visitor::emit_alpha_test() /* f0.1 = 0 */ fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - cmp = emit(CMP(reg_null_f, some_reg, some_reg, - BRW_CONDITIONAL_NEQ)); + cmp = bld.CMP(bld.reg_null_f(), some_reg, some_reg, + BRW_CONDITIONAL_NEQ); } else { /* RT0 alpha */ fs_reg color = offset(outputs[0], 3); /* f0.1 &= func(color, ref) */ - cmp = emit(CMP(reg_null_f, color, fs_reg(key->alpha_test_ref), - cond_for_alpha_func(key->alpha_test_func))); + cmp = bld.CMP(bld.reg_null_f(), color, fs_reg(key->alpha_test_ref), + cond_for_alpha_func(key->alpha_test_func)); } cmp->predicate = BRW_PREDICATE_NORMAL; cmp->flag_subreg = 1; @@ -3133,7 +1181,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - this->current_annotation = "FB write header"; + bld.set_annotation("FB write header"); bool header_present = true; int reg_size = dispatch_width / 8; @@ -3163,22 +1211,22 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (payload.aa_dest_stencil_reg) { sources[length] = fs_reg(GRF, alloc.allocate(1)); - emit(MOV(sources[length], - fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)))); + bld.MOV(sources[length], + fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))); length++; } prog_data->uses_omask = prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); if (prog_data->uses_omask) { - this->current_annotation = "FB write oMask"; + bld.set_annotation("FB write oMask"); assert(this->sample_mask.file != BAD_FILE); /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since * it's unsinged single words, one vgrf is always 16-wide. */ sources[length] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UW, 16); - emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); + bld.emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); length++; } @@ -3192,7 +1240,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (src0_alpha.file != BAD_FILE) { sources[length] = fs_reg(GRF, alloc.allocate(reg_size), src0_alpha.type, src0_alpha.width); - fs_inst *inst = emit(MOV(sources[length], src0_alpha)); + fs_inst *inst = bld.MOV(sources[length], src0_alpha); inst->saturate = key->clamp_fragment_color; length++; } @@ -3217,19 +1265,19 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { /* Hand over gl_FragDepth. */ assert(this->frag_depth.file != BAD_FILE); - emit(MOV(sources[length], this->frag_depth)); + bld.MOV(sources[length], this->frag_depth); } else { /* Pass through the payload depth. */ - emit(MOV(sources[length], - fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); + bld.MOV(sources[length], + fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))); } length++; } if (payload.dest_depth_reg) { sources[length] = fs_reg(this, glsl_type::float_type); - emit(MOV(sources[length], - fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)))); + bld.MOV(sources[length], + fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))); length++; } @@ -3238,16 +1286,16 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (brw->gen >= 7) { /* Send from the GRF */ fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); - load = emit(LOAD_PAYLOAD(payload, sources, length)); + load = bld.LOAD_PAYLOAD(payload, sources, length); payload.reg = alloc.allocate(load->regs_written); load->dst = payload; - write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload); + write = bld.emit(FS_OPCODE_FB_WRITE, reg_undef, payload); write->base_mrf = -1; } else { /* Send from the MRF */ - load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F), - sources, length)); - write = emit(FS_OPCODE_FB_WRITE); + load = bld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F), + sources, length); + write = bld.emit(FS_OPCODE_FB_WRITE); write->base_mrf = 1; } @@ -3278,17 +1326,17 @@ fs_visitor::emit_fb_writes() if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_end(); - this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB dual-source write"); + bld.set_annotation(ralloc_asprintf(this->mem_ctx, + "FB dual-source write")); inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, reg_undef, 4); inst->target = 0; prog_data->dual_src_blend = true; } else if (key->nr_color_regions > 0) { for (int target = 0; target < key->nr_color_regions; target++) { - this->current_annotation = ralloc_asprintf(this->mem_ctx, + bld.set_annotation(ralloc_asprintf(this->mem_ctx, "FB write target %d", - target); + target)); fs_reg src0_alpha; if (brw->gen >= 6 && key->replicate_alpha && target != 0) src0_alpha = offset(outputs[0], 3); @@ -3315,32 +1363,7 @@ fs_visitor::emit_fb_writes() } inst->eot = true; - this->current_annotation = NULL; -} - -void -fs_visitor::resolve_ud_negate(fs_reg *reg) -{ - if (reg->type != BRW_REGISTER_TYPE_UD || - !reg->negate) - return; - - fs_reg temp = fs_reg(this, glsl_type::uint_type); - emit(MOV(temp, *reg)); - *reg = temp; -} - -void -fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) -{ - assert(ctx->Const.UniformBooleanTrue == 1); - - if (rvalue->type != glsl_type::bool_type) - return; - - fs_reg temp = fs_reg(this, glsl_type::bool_type); - emit(AND(temp, *reg, fs_reg(1))); - *reg = temp; + bld.set_annotation(NULL); } fs_visitor::fs_visitor(struct brw_context *brw, @@ -3350,59 +1373,53 @@ fs_visitor::fs_visitor(struct brw_context *brw, struct gl_shader_program *shader_prog, struct gl_fragment_program *fp, unsigned dispatch_width) - : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base, - MESA_SHADER_FRAGMENT), - reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), - reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), - reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), + : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base, mem_ctx, + MESA_SHADER_FRAGMENT, INTEL_DEBUG & DEBUG_WM, + prog_data->uses_kill, + brw::fs_builder(brw, mem_ctx, alloc, instructions, + dispatch_width), + (dispatch_width == 16 ? ST_FS16 : ST_FS8), + prog_data->base.nr_params), key(key), prog_data(&prog_data->base), dispatch_width(dispatch_width) { - this->mem_ctx = mem_ctx; init(); } void fs_visitor::init() { - this->failed = false; this->simd16_unsupported = false; this->no16_msg = NULL; - this->variable_ht = hash_table_ctor(0, - hash_table_pointer_hash, - hash_table_pointer_compare); memset(&this->payload, 0, sizeof(this->payload)); memset(this->outputs, 0, sizeof(this->outputs)); memset(this->output_components, 0, sizeof(this->output_components)); this->source_depth_to_render_target = false; this->runtime_check_aads_emit = false; - this->first_non_payload_grf = 0; - this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; - - this->current_annotation = NULL; - this->base_ir = NULL; - this->virtual_grf_start = NULL; - this->virtual_grf_end = NULL; this->live_intervals = NULL; this->regs_live_at_ip = NULL; - this->uniforms = 0; this->last_scratch = 0; this->pull_constant_loc = NULL; this->push_constant_loc = NULL; - this->force_uncompressed_stack = 0; - this->spilled_any_registers = false; this->do_dual_src = false; - - if (dispatch_width == 8) - this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); } -fs_visitor::~fs_visitor() +fs_reg +fs_visitor::emit_untyped_surface_header() { - hash_table_dtor(this->variable_ht); + using namespace brw; + const fs_reg payload = half(bld.natural_reg(BRW_REGISTER_TYPE_UD), 0); + const fs_reg sample_mask = + (uses_kill ? brw_flag_reg(0, 1) : + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); + + exec_all(exec_half(0, bld.MOV(payload, fs_reg(0u)))); + exec_all(bld.MOV(component(payload, 7), sample_mask)); + + return payload; } diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index daedb35a88c..31582635056 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -47,7 +47,6 @@ public: fs_reg(fs_visitor *v, const struct glsl_type *type); bool equals(const fs_reg &r) const; - bool is_valid_3src() const; bool is_contiguous() const; fs_reg &apply_stride(unsigned stride); @@ -82,6 +81,18 @@ public: uint8_t stride; }; +namespace brw { + template<> + struct reg_traits<fs_reg> { + typedef fs_reg src_reg; + typedef fs_reg dst_reg; + + static const unsigned alloc_size = 1; + static const bool allows_swizzle = false; + static const bool allows_writemask = false; + }; +} + static inline fs_reg byte_offset(fs_reg reg, unsigned delta) { diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 2d5610b712d..cd495e8cb5f 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -30,7 +30,6 @@ namespace brw { -class vec4_visitor; class dst_reg; class src_reg : public backend_reg diff --git a/src/mesa/drivers/dri/i965/brw_ir_visitor.cpp b/src/mesa/drivers/dri/i965/brw_ir_visitor.cpp new file mode 100644 index 00000000000..3e67aeda0af --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_ir_visitor.cpp @@ -0,0 +1,190 @@ +/* + * Copyright © 2010-2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_ir_visitor.h" +#include "brw_cfg.h" + +using namespace brw; + +base_visitor::base_visitor(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + void *mem_ctx, + gl_shader_stage stage, + bool debug_flag, + unsigned uniform_array_size) + : brw(brw), + ctx(&brw->ctx), + shader(shader_prog ? + (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL), + shader_prog(shader_prog), + prog(prog), + stage_prog_data(stage_prog_data), + mem_ctx(mem_ctx), + cfg(NULL), + stage(stage), + fail_msg(NULL), + debug_flag(debug_flag), + failed(false), + first_non_payload_grf(0), + max_grf(brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF), + virtual_grf_start(NULL), + virtual_grf_end(NULL), + uniforms(0), + uniform_size(rzalloc_array(mem_ctx, int, uniform_array_size)), + uniform_vector_size(rzalloc_array(mem_ctx, int, uniform_array_size)), + uniform_array_size(uniform_array_size) +{ +} + +void +base_visitor::dump_instructions() +{ + dump_instructions(NULL); +} + +void +base_visitor::dump_instructions(const char *name) +{ + FILE *file = stderr; + if (name && geteuid() != 0) { + file = fopen(name, "w"); + if (!file) + file = stderr; + } + + int ip = 0; + foreach_block_and_inst(block, backend_instruction, inst, cfg) { + if (!name) + fprintf(stderr, "%d: ", ip++); + dump_instruction(inst, file); + } + + if (file != stderr) { + fclose(file); + } +} + +void +base_visitor::calculate_cfg() +{ + if (this->cfg) + return; + cfg = new(mem_ctx) cfg_t(&this->instructions); +} + +void +base_visitor::invalidate_cfg() +{ + ralloc_free(this->cfg); + this->cfg = NULL; +} + +/** + * Sets up the starting offsets for the groups of binding table entries + * commong to all pipeline stages. + * + * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're + * unused but also make sure that addition of small offsets to them will + * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. + */ +void +base_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) +{ + int num_textures = _mesa_fls(prog->SamplersUsed); + + stage_prog_data->binding_table.texture_start = next_binding_table_offset; + next_binding_table_offset += num_textures; + + if (shader) { + stage_prog_data->binding_table.ubo_start = next_binding_table_offset; + next_binding_table_offset += shader->base.NumUniformBlocks; + } else { + stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0; + } + + if (INTEL_DEBUG & DEBUG_SHADER_TIME) { + stage_prog_data->binding_table.shader_time_start = next_binding_table_offset; + next_binding_table_offset++; + } else { + stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0; + } + + if (prog->UsesGather) { + if (brw->gen >= 8) { + stage_prog_data->binding_table.gather_texture_start = + stage_prog_data->binding_table.texture_start; + } else { + stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset; + next_binding_table_offset += num_textures; + } + } else { + stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; + } + + if (shader_prog && shader_prog->NumAtomicBuffers) { + stage_prog_data->binding_table.abo_start = next_binding_table_offset; + next_binding_table_offset += shader_prog->NumAtomicBuffers; + } else { + stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; + } + + /* This may or may not be used depending on how the compile goes. */ + stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset; + next_binding_table_offset++; + + assert(next_binding_table_offset <= BRW_MAX_SURFACES); + + /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */ +} + +void +base_visitor::vfail(const char *format, va_list va) +{ + char *msg; + + if (failed) + return; + + failed = true; + + msg = ralloc_vasprintf(mem_ctx, format, va); + msg = ralloc_asprintf(mem_ctx, "compile failed: %s\n", msg); + + this->fail_msg = msg; + + if (debug_flag) { + fprintf(stderr, "%s", msg); + } +} + +void +base_visitor::fail(const char *format, ...) +{ + va_list va; + + va_start(va, format); + vfail(format, va); + va_end(va); +} diff --git a/src/mesa/drivers/dri/i965/brw_ir_visitor.h b/src/mesa/drivers/dri/i965/brw_ir_visitor.h new file mode 100644 index 00000000000..876f162b91e --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_ir_visitor.h @@ -0,0 +1,2353 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010-2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_IR_VISITOR_H +#define BRW_IR_VISITOR_H + +#include "brw_ir_builder.h" +#include "brw_program.h" +#include "program/hash_table.h" +#include "glsl/ir_uniform.h" + +extern "C" { +#include "program/sampler.h" +} + +namespace brw { + +class base_visitor : public ir_visitor { +protected: + base_visitor(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + void *mem_ctx, + gl_shader_stage stage, + bool debug_flag, + unsigned uniform_array_size); + +public: + struct brw_context *const brw; + struct gl_context *const ctx; + struct brw_shader *const shader; + struct gl_shader_program *const shader_prog; + struct gl_program *const prog; + struct brw_stage_prog_data *const stage_prog_data; + + /** ralloc context for temporary data used during compile */ + void *mem_ctx; + + /** + * List of either fs_inst or vec4_instruction (inheriting from + * backend_instruction) + */ + exec_list instructions; + + cfg_t *cfg; + + gl_shader_stage stage; + + virtual void dump_instruction(backend_instruction *inst) = 0; + virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0; + virtual void dump_instructions(); + virtual void dump_instructions(const char *name); + + void calculate_cfg(); + void invalidate_cfg(); + + void assign_common_binding_table_offsets(uint32_t next_binding_table_offset); + + virtual void invalidate_live_intervals() = 0; + + void vfail(const char *msg, va_list args); + void fail(const char *msg, ...); + + char *fail_msg; + bool debug_flag; + bool failed; + + int first_non_payload_grf; + /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ + unsigned max_grf; + int *virtual_grf_start; + int *virtual_grf_end; + + /** Number of uniform variable components visited. */ + unsigned uniforms; + int *uniform_size; + int *uniform_vector_size; + unsigned uniform_array_size; /*< Size of uniform_[vector_]size arrays */ +}; + +template<typename V, typename B> +class backend_visitor : public base_visitor { +protected: + typedef typename B::src_reg src_reg; + typedef typename B::dst_reg dst_reg; + typedef typename B::vector_builder::src_reg src_vector; + typedef typename B::vector_builder::dst_reg dst_vector; + typedef typename B::instruction instruction; + + V & + self() { + return static_cast<V &>(*this); + } + + backend_visitor(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + void *mem_ctx, + gl_shader_stage stage, + bool debug_flag, + bool uses_kill, + const B &bld, + shader_time_shader_type st_type, + unsigned uniform_array_size) : + base_visitor(brw, shader_prog, prog, stage_prog_data, mem_ctx, stage, + debug_flag, uniform_array_size), + variable_ht(hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare)), + bld(bld), uses_kill(uses_kill), st_type(st_type) + { + } + + ~backend_visitor() + { + hash_table_dtor(this->variable_ht); + } + + src_reg + visit_result(ir_instruction *ir) + { + ir->accept(this); + assert(this->result.file != BAD_FILE); + return this->result; + } + + unsigned + emit_constant_values(const dst_reg &dst, ir_constant *ir) + { + unsigned size = 0; + + if (ir->type->is_record()) { + foreach_in_list(ir_constant, field_value, &ir->components) + size += emit_constant_values(offset(dst, size), field_value); + + } else if (ir->type->is_array()) { + for (unsigned i = 0; i < ir->type->length; i++) + size += emit_constant_values(offset(dst, size), + ir->array_elements[i]); + + } else { + const int n = ir->type->vector_elements; + typename B::vector_builder vbld = bld.vector(); + + for (int j = 0; j < ir->type->matrix_columns; j++) { + dst_vector tmp = retype(offset(dst_vector_n(dst, 4), size), + brw_type_for_base_type(ir->type)); + unsigned mask = (1 << n) - 1; + + while (mask) { + const int i = ffs(mask) - 1; + + tmp.writemask = 1 << i; + + /* Find other components that match the one we're about to + * write. Emits fewer instructions for things like vec4(0.5, + * 1.5, 1.5, 1.5). + */ + for (int k = i + 1; k < n; k++) { + if (ir->type->base_type == GLSL_TYPE_BOOL) { + if (ir->value.b[j * n + i] == ir->value.b[j * n + k]) + tmp.writemask |= 1 << k; + } else { + /* u, i, and f storage all line up, so no need for a + * switch case for comparing each type. + */ + if (ir->value.u[j * n + i] == ir->value.u[j * n + k]) + tmp.writemask |= 1 << k; + } + } + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + vbld.MOV(tmp, src_reg(ir->value.f[j * n + i])); + break; + case GLSL_TYPE_INT: + vbld.MOV(tmp, src_reg(ir->value.i[j * n + i])); + break; + case GLSL_TYPE_UINT: + vbld.MOV(tmp, src_reg(ir->value.u[j * n + i])); + break; + case GLSL_TYPE_BOOL: + vbld.MOV(tmp, src_reg(ir->value.b[j * n + i] ? + ctx->Const.UniformBooleanTrue : 0)); + break; + default: + unreachable("Non-float/uint/int/bool constant"); + } + + mask &= ~tmp.writemask; + } + + size += CEILING(n, alloc_size); + } + } + + return size; + } + + void + visit(ir_constant *ir) + { + dst_reg dst = self().temporary_reg(ir->type); + emit_constant_values(dst, ir); + this->result = src_reg(dst); + } + + dst_reg * + variable_storage(ir_variable *var) + { + return (dst_reg *)hash_table_find(this->variable_ht, var); + } + + /* Our support for builtin uniforms is even scarier than non-builtin. + * It sits on top of the PROG_STATE_VAR parameters that are + * automatically updated from GL context state. + */ + void + setup_builtin_uniform_values(ir_variable *ir) + { + const ir_state_slot *const slots = ir->get_state_slots(); + + for (unsigned i = 0; i < ir->get_num_state_slots(); i++) { + /* This state reference has already been setup by ir_to_mesa, but we'll + * get the same index back here. + */ + int index = _mesa_add_state_reference(this->prog->Parameters, + (gl_state_index *)slots[i].tokens); + gl_constant_value *values = prog->Parameters->ParameterValues[index]; + const unsigned n = size_for_swizzle( + from_glsl_swizzle(WRITEMASK_XYZW, slots[i].swizzle)); + + /* Add each of the unique swizzles of the element as a parameter. + * This'll end up matching the expected layout of the + * array/matrix/structure we're trying to fill in. + */ + for (unsigned j = 0; j < MAX2(n, alloc_size); j++) + stage_prog_data->param[uniforms * alloc_size + j] = + &values[GET_SWZ(slots[i].swizzle, j)]; + + uniform_vector_size[uniforms] = n; + uniforms += CEILING(n, alloc_size); + } + } + + /* Our support for uniforms is piggy-backed on the struct + * gl_fragment_program, because that's where the values actually + * get stored, rather than in some global gl_shader_program uniform + * store. + */ + void + setup_uniform_values(ir_variable *ir) + { + int namelen = strlen(ir->name); + + /* The data for our (non-builtin) uniforms is stored in a series of + * gl_uniform_driver_storage structs for each subcomponent that + * glGetUniformLocation() could name. We know it's been set up in the same + * order we'd walk the type, so walk the list of storage and find anything + * with our name, or the prefix of a component that starts with our name. + */ + for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { + struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; + + if (strncmp(ir->name, storage->name, namelen) != 0 || + (storage->name[namelen] != 0 && + storage->name[namelen] != '.' && + storage->name[namelen] != '[')) { + continue; + } + + gl_constant_value *components = storage->storage; + unsigned vector_count = (MAX2(storage->array_elements, 1) * + storage->type->matrix_columns); + + for (unsigned s = 0; s < vector_count; s++) { + unsigned i; + assert(uniforms < uniform_array_size); + + for (i = 0; i < storage->type->vector_elements; i++) { + stage_prog_data->param[uniforms * alloc_size + i] = + &components[s * storage->type->vector_elements + i]; + } + for (; i < alloc_size; i++) { + static const gl_constant_value zero = { 0.0 }; + stage_prog_data->param[uniforms * alloc_size + i] = &zero; + } + + uniform_vector_size[uniforms] = storage->type->vector_elements; + uniforms += CEILING(storage->type->vector_elements, alloc_size); + } + } + } + + unsigned + type_vector_size(const struct glsl_type *type) + { + if (type->is_scalar() || type->is_vector() || type->is_matrix()) + return type->vector_elements; + else + return 4; + } + + void + visit(ir_variable *ir) + { + dst_reg *reg = NULL; + + if (variable_storage(ir)) + return; + + if (ir->data.mode == ir_var_auto || + ir->data.mode == ir_var_temporary) { + reg = new(mem_ctx) dst_reg(self().temporary_reg(ir->type)); + + } else if (ir->data.mode == ir_var_uniform) { + /* Thanks to the lower_ubo_reference pass, we will see only + * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO + * variables, so no need for them to be in variable_ht. + * + * Some uniforms, such as samplers and atomic counters, have no actual + * storage, so we should ignore them. + */ + if (ir->is_in_uniform_block() || type_size(ir->type) == 0) + return; + + if (bld.dispatch_width() == 16) { + fail("Failed to find uniform '%s' in SIMD16\n", ir->name); + return; + } + + reg = new(mem_ctx) dst_reg( + resize(retype(dst_reg(UNIFORM, this->uniforms), + brw_type_for_base_type(ir->type)), + type_vector_size(ir->type))); + + /* Track how big the whole uniform variable is, in case we need to put a + * copy of its data into pull constants for array access. + */ + assert(this->uniforms < uniform_array_size); + this->uniform_size[this->uniforms] = type_size(ir->type); + + if (!strncmp(ir->name, "gl_", 3)) { + setup_builtin_uniform_values(ir); + } else { + setup_uniform_values(ir); + } + + } else { + unreachable("not reached"); + } + + hash_table_insert(this->variable_ht, reg, ir); + } + + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ + void + visit_instructions(const exec_list *list) + { + foreach_in_list(ir_instruction, ir, list) { + bld.set_base_ir(ir); + ir->accept(this); + } + } + + void + resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg) + { + assert(ctx->Const.UniformBooleanTrue == 1); + + if (rvalue->type != glsl_type::bool_type) + return; + + dst_reg temp = bld.natural_reg(BRW_REGISTER_TYPE_D); + bld.AND(temp, *reg, src_reg(1)); + *reg = src_reg(temp); + } + + void + visit(ir_dereference_variable *ir) + { + dst_reg *reg = variable_storage(ir->var); + + if (!reg) { + fail("Failed to find variable storage for %s\n", ir->var->name); + this->result = src_reg(bld.reg_null_d()); + return; + } + + this->result = resize(src_reg(*reg), type_vector_size(ir->type)); + } + + void + visit(ir_dereference_record *ir) + { + const glsl_type *struct_type = ir->record->type; + unsigned off = 0; + + ir->record->accept(this); + + for (unsigned i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + off += type_size(struct_type->fields.structure[i].type); + } + + this->result = retype(resize(offset(this->result, off), + type_vector_size(ir->type)), + brw_type_for_base_type(ir->type)); + } + + virtual unsigned + get_array_stride(ir_dereference_array *ir) + { + /* Under normal circumstances array elements are stored consecutively, so + * the stride is equal to the size of the array element. + */ + return type_size(ir->type); + } + + void + visit(ir_dereference_array *ir) + { + ir_constant *constant_index = ir->array_index->constant_expression_value(); + src_reg src = retype(visit_result(ir->array), + brw_type_for_base_type(ir->type)); + + if (constant_index) { + src = offset(src, constant_index->value.i[0] * get_array_stride(ir)); + } else { + /* Variable index array dereference. We attach the variable index + * component to the reg as a pointer to a register containing the + * offset. Currently only uniform arrays are supported in this + * patch, and that reladdr pointer is resolved by + * move_uniform_array_access_to_pull_constants(). All other array + * types are lowered by lower_variable_index_to_cond_assign(). + */ + src_reg index_reg = visit_result(ir->array_index); + + if (get_array_stride(ir) != 1) { + dst_reg tmp = bld.scalar_reg(BRW_REGISTER_TYPE_D); + bld.MUL(tmp, index_reg, src_reg(get_array_stride(ir))); + index_reg = src_reg(tmp); + } + + if (src.reladdr) { + dst_reg tmp = bld.scalar_reg(BRW_REGISTER_TYPE_D); + bld.ADD(tmp, index_reg, *src.reladdr); + index_reg = src_reg(tmp); + } + + src.reladdr = new(mem_ctx) src_reg(index_reg); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + this->result = resize(src, type_vector_size(ir->type)); + } + + /** + * Emit a gen6 IF statement with the comparison folded into the IF + * instruction. + */ + void + emit_if_gen6(ir_if *ir) + { + ir_expression *expr = ir->condition->as_expression(); + + if (expr && expr->operation != ir_binop_ubo_load) { + bool is_scalar = true; + src_reg op[3]; + + assert(expr->get_num_operands() <= 3); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + is_scalar &= expr->operands[i]->type->is_scalar(); + op[i] = visit_result(expr->operands[i]); + } + + switch (expr->operation) { + case ir_unop_logic_not: + bld.IF(op[0], src_reg(0), BRW_CONDITIONAL_Z); + return; + + case ir_binop_logic_xor: + bld.IF(op[0], op[1], BRW_CONDITIONAL_NZ); + return; + + case ir_binop_logic_or: { + dst_reg temp = bld.scalar_reg(BRW_REGISTER_TYPE_D); + bld.OR(temp, op[0], op[1]); + bld.IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ); + return; + } + case ir_binop_logic_and: { + dst_reg temp = bld.scalar_reg(BRW_REGISTER_TYPE_D); + bld.AND(temp, op[0], op[1]); + bld.IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ); + return; + } + case ir_unop_f2b: + exec_condmod(BRW_CONDITIONAL_NZ, + bld.emit(BRW_OPCODE_IF, bld.reg_null_f(), + op[0], src_reg(0))); + return; + + case ir_unop_i2b: + bld.IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ); + return; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_nequal: + case ir_binop_all_equal: + case ir_binop_any_nequal: + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } + + if (is_scalar) { + bld.IF(op[0], op[1], + brw_conditional_for_comparison(expr->operation)); + } else { + bld.CMP(bld.reg_null_d(), op[0], op[1], + brw_conditional_for_comparison(expr->operation)); + bld.IF(expr->operation == ir_binop_all_equal ? + BRW_PREDICATE_ALIGN16_ALL4H : + BRW_PREDICATE_ALIGN16_ANY4H); + } + return; + + case ir_unop_any: + assert(!is_scalar); + bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ); + bld.IF(BRW_PREDICATE_ALIGN16_ANY4H); + return; + + case ir_triop_csel: { + /* Expand the boolean condition into the flag register. */ + exec_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(bld.reg_null_d(), op[0])); + + /* Select which boolean to return. */ + dst_reg temp = bld.scalar_reg(op[1].type); + exec_predicate(BRW_PREDICATE_NORMAL, + bld.emit(BRW_OPCODE_SEL, temp, op[1], op[2])); + bld.IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ); + return; + } + default: + unreachable("not reached"); + } + } + + bld.IF(visit_result(ir->condition), src_reg(0), BRW_CONDITIONAL_NZ); + } + + enum brw_predicate + emit_bool_to_cond_code(ir_rvalue *ir) + { + ir_expression *expr = ir->as_expression(); + enum brw_predicate predicate = BRW_PREDICATE_NORMAL; + + if (expr && expr->operation != ir_binop_ubo_load) { + bool is_scalar = true; + src_reg op[3]; + + assert(expr->get_num_operands() <= 3); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + is_scalar &= expr->operands[i]->type->is_scalar(); + op[i] = bld.fix_condmod_negate(visit_result(expr->operands[i])); + } + + switch (expr->operation) { + case ir_unop_logic_not: + exec_condmod(BRW_CONDITIONAL_Z, + bld.AND(bld.reg_null_d(), op[0], src_reg(1))); + break; + + case ir_binop_logic_xor: + if (ctx->Const.UniformBooleanTrue == 1) { + dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD); + bld.XOR(dst, op[0], op[1]); + exec_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.reg_null_d(), src_reg(dst), src_reg(1))); + } else { + exec_condmod(BRW_CONDITIONAL_NZ, + bld.XOR(bld.reg_null_d(), op[0], op[1])); + } + break; + + case ir_binop_logic_or: + if (ctx->Const.UniformBooleanTrue == 1) { + dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD); + bld.OR(dst, op[0], op[1]); + exec_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.reg_null_d(), src_reg(dst), src_reg(1))); + } else { + exec_condmod(BRW_CONDITIONAL_NZ, + bld.OR(bld.reg_null_d(), op[0], op[1])); + } + break; + + case ir_binop_logic_and: + if (ctx->Const.UniformBooleanTrue == 1) { + dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD); + bld.AND(dst, op[0], op[1]); + exec_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.reg_null_d(), src_reg(dst), src_reg(1))); + } else { + exec_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.reg_null_d(), op[0], op[1])); + } + break; + + case ir_unop_f2b: + if (brw->gen >= 6) + bld.CMP(bld.reg_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ); + else + exec_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(bld.reg_null_f(), op[0])); + break; + + case ir_unop_i2b: + if (brw->gen >= 6) + bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ); + else + exec_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(bld.reg_null_d(), op[0])); + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_nequal: + case ir_binop_all_equal: + case ir_binop_any_nequal: + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } + + bld.CMP(bld.reg_null_d(), op[0], op[1], + brw_conditional_for_comparison(expr->operation)); + + if (!is_scalar) + predicate = (expr->operation == ir_binop_all_equal ? + BRW_PREDICATE_ALIGN16_ALL4H : + BRW_PREDICATE_ALIGN16_ANY4H); + break; + + case ir_unop_any: + assert(!is_scalar); + bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ); + predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + + case ir_triop_csel: { + /* Expand the boolean condition into the flag register. */ + exec_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(bld.reg_null_d(), op[0])); + + /* Select which boolean to return. */ + dst_reg temp = bld.natural_reg(op[1].type); + exec_predicate(BRW_PREDICATE_NORMAL, + bld.SEL(temp, op[1], op[2])); + + /* Expand the result to a condition code. */ + exec_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(bld.reg_null_d(), src_reg(temp))); + break; + } + + default: + unreachable("not reached"); + } + } else { + exec_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.reg_null_d(), visit_result(ir), src_reg(1))); + } + + return predicate; + } + + void + visit(ir_if *ir) + { + /* Don't point the annotation at the if statement, because then it plus + * the then and else blocks get printed. + */ + bld.set_base_ir(ir->condition); + + if (brw->gen == 6) { + emit_if_gen6(ir); + } else { + bld.IF(emit_bool_to_cond_code(ir->condition)); + } + + visit_instructions(&ir->then_instructions); + + if (!ir->else_instructions.is_empty()) { + bld.set_base_ir(ir->condition); + bld.emit(BRW_OPCODE_ELSE); + + visit_instructions(&ir->else_instructions); + } + + bld.set_base_ir(ir->condition); + bld.emit(BRW_OPCODE_ENDIF); + + self().try_replace_with_sel(); + } + + void + visit(ir_loop *ir) + { + if (brw->gen < 6) + self().no16("Can't support (non-uniform) control flow on SIMD16\n"); + + /* We don't want debugging output to print the whole body of the + * loop as the annotation. + */ + bld.set_base_ir(NULL); + bld.emit(BRW_OPCODE_DO); + + visit_instructions(&ir->body_instructions); + + bld.set_base_ir(NULL); + bld.emit(BRW_OPCODE_WHILE); + } + + void + visit(ir_loop_jump *ir) + { + switch (ir->mode) { + case ir_loop_jump::jump_break: + bld.emit(BRW_OPCODE_BREAK); + break; + case ir_loop_jump::jump_continue: + bld.emit(BRW_OPCODE_CONTINUE); + break; + } + } + + src_reg + get_timestamp() + { + assert(brw->gen >= 7); + dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD); + + /* The caller wants the low 32 bits of the timestamp. Since it's running + * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, + * which is plenty of time for our purposes. It is identical across the + * EUs, but since it's tracking GPU core speed it will increment at a + * varying rate as render P-states change. + * + * The caller could also check if render P-states have changed (or anything + * else that might disrupt timing) by reading back subregister 2 and + * checking if that field is != 0. + */ + exec_all(bld.MOV(dst, brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_TIMESTAMP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_4, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW))); + + return src_reg(dst); + } + + void + emit_shader_time_begin() + { + bld.set_annotation("shader time start"); + shader_start_time = get_timestamp(); + } + + void + emit_shader_time_end() + { + B ubld = bld.force_uncompressed(); + + bld.set_annotation("shader time end"); + src_reg shader_end_time = get_timestamp(); + + /* Check that there weren't any timestamp reset events (assuming these + * were the only two timestamp reads that happened). + */ + src_reg reset_end = component(shader_end_time, 3); + + exec_condmod(BRW_CONDITIONAL_Z, + bld.AND(bld.reg_null_d(), reset_end, src_reg(1u))); + bld.IF(BRW_PREDICATE_NORMAL); + + /* Take the current timestamp and get the delta. */ + dst_reg diff = bld.scalar_reg(BRW_REGISTER_TYPE_UD); + ubld.ADD(diff, component(negate(shader_start_time), 0), + component(shader_end_time, 0)); + + /* If there were no instructions between the two timestamp gets, the diff + * is 2 cycles. Remove that overhead, so I can forget about that when + * trying to determine the time taken for single instructions. + */ + ubld.ADD(diff, src_reg(diff), src_reg(-2u)); + + emit_shader_time_write(st_type, src_reg(diff)); + emit_shader_time_write(st_type + ST_WRITTEN, src_reg(1u)); + bld.emit(BRW_OPCODE_ELSE); + emit_shader_time_write(st_type + ST_RESET, src_reg(1u)); + bld.emit(BRW_OPCODE_ENDIF); + } + + void + emit_shader_time_write(int type, const src_reg &value) + { + B ubld = bld.force_uncompressed(); + const int shader_time_index = + brw_get_shader_time_index(brw, shader_prog, prog, + (enum shader_time_shader_type)type); + const dst_reg payload = bld.natural_reg(BRW_REGISTER_TYPE_UD, 2); + + ubld.MOV(payload, src_reg(shader_time_index * SHADER_TIME_STRIDE)); + ubld.MOV(offset(payload, 1), value); + ubld.emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(payload)); + } + + void + emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, + const dst_reg &dst, const src_reg &addr, + const src_reg &src0, const src_reg &src1) + { + const dst_reg payload = half(bld.natural_reg(BRW_REGISTER_TYPE_UD, 4), 0); + src_reg srcs[4]; + unsigned h, n = 0; + + /* Initialize the message header if necessary. */ + srcs[n] = self().emit_untyped_surface_header(); + n += h = (srcs[n].file == BAD_FILE ? 0 : 1); + + /* Set the atomic operation offset. */ + srcs[n] = src_reg(bld.natural_reg(BRW_REGISTER_TYPE_UD)); + bld.MOV(dst_reg(srcs[n++]), addr); + + /* Set the atomic operation arguments. */ + if (src0.file != BAD_FILE) { + srcs[n] = src_reg(bld.natural_reg(BRW_REGISTER_TYPE_UD)); + bld.MOV(dst_reg(srcs[n++]), src0); + } + + if (src1.file != BAD_FILE) { + srcs[n] = src_reg(bld.natural_reg(BRW_REGISTER_TYPE_UD)); + bld.MOV(dst_reg(srcs[n++]), src1); + } + + /* Emit the instruction. Note that this maps to the normal + * SIMD8 untyped atomic message on Ivy Bridge when we are doing + * SIMD4x2, but that's OK because unused channels will be masked + * out. + */ + bld.LOAD_PAYLOAD(payload, srcs, n); + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_reg(payload), + src_reg(atomic_op), src_reg(surf_index)) + ->mlen = (n - h) * bld.dispatch_width() / 8 + h; + } + + void + emit_untyped_surface_read(unsigned surf_index, const dst_reg &dst, + const src_reg &addr) + { + const dst_reg payload = half(bld.natural_reg(BRW_REGISTER_TYPE_UD, 2), 0); + src_reg srcs[2]; + unsigned h, n = 0; + + /* Initialize the message header if necessary. */ + srcs[n] = self().emit_untyped_surface_header(); + n += h = (srcs[n].file == BAD_FILE ? 0 : 1); + + /* Set the surface read offset. */ + srcs[n] = src_reg(bld.natural_reg(BRW_REGISTER_TYPE_UD)); + bld.MOV(dst_reg(srcs[n++]), addr); + + /* Emit the instruction. Note that this maps to the normal + * SIMD8 untyped atomic message on Ivy Bridge when we are doing + * SIMD4x2, but that's OK because unused channels will be masked + * out. + */ + bld.LOAD_PAYLOAD(payload, srcs, n); + bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, + src_reg(payload), src_reg(surf_index)) + ->mlen = h + bld.dispatch_width() / 8; + } + + void + visit_atomic_counter_intrinsic(ir_call *ir) + { + ir_dereference *deref = static_cast<ir_dereference *>( + ir->actual_parameters.get_head()); + ir_variable *location = deref->variable_referenced(); + unsigned surf_index = (stage_prog_data->binding_table.abo_start + + location->data.binding); + + /* Calculate the surface offset */ + src_reg offset(bld.scalar_reg(BRW_REGISTER_TYPE_UD)); + ir_dereference_array *deref_array = deref->as_dereference_array(); + + if (deref_array) { + src_reg tmp(bld.scalar_reg(BRW_REGISTER_TYPE_UD)); + bld.MUL(dst_reg(tmp), visit_result(deref_array->array_index), + src_reg(ATOMIC_COUNTER_SIZE)); + bld.ADD(dst_reg(offset), tmp, src_reg(location->data.atomic.offset)); + } else { + offset = src_reg(location->data.atomic.offset); + } + + /* Emit the appropriate machine instruction */ + const char *callee = ir->callee->function_name(); + dst_reg dst(visit_result(ir->return_deref)); + + if (!strcmp("__intrinsic_atomic_read", callee)) { + emit_untyped_surface_read(surf_index, dst, offset); + + } else if (!strcmp("__intrinsic_atomic_increment", callee)) { + emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, + src_reg(), src_reg()); + + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { + emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, + src_reg(), src_reg()); + } + } + + void + visit(ir_call *ir) + { + const char *callee = ir->callee->function_name(); + + if (!strcmp("__intrinsic_atomic_read", callee) || + !strcmp("__intrinsic_atomic_increment", callee) || + !strcmp("__intrinsic_atomic_predecrement", callee)) { + visit_atomic_counter_intrinsic(ir); + } else { + unreachable("Unsupported intrinsic."); + } + } + + void + visit(ir_return *) + { + unreachable("FINISHME"); + } + + void + visit(ir_function_signature *) + { + unreachable("not reached"); + } + + void + visit(ir_function *ir) + { + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(NULL, &empty, false); + assert(sig); + + visit_instructions(&sig->body); + } + } + + bool + try_emit_mad(ir_expression *ir) + { + /* 3-src instructions were introduced in gen6. */ + if (brw->gen < 6) + return false; + + /* MAD can only handle floating-point data. */ + if (ir->type->base_type != GLSL_TYPE_FLOAT) + return false; + + ir_rvalue *nonmul = ir->operands[1]; + ir_expression *mul = ir->operands[0]->as_expression(); + + if (!mul || mul->operation != ir_binop_mul) { + nonmul = ir->operands[0]; + mul = ir->operands[1]->as_expression(); + + if (!mul || mul->operation != ir_binop_mul) + return false; + } + + if (nonmul->as_constant() || + mul->operands[0]->as_constant() || + mul->operands[1]->as_constant()) + return false; + + dst_reg result = self().temporary_reg(ir->type); + bld.MAD(result, bld.fix_3src_operand(visit_result(nonmul)), + bld.fix_3src_operand(visit_result(mul->operands[0])), + bld.fix_3src_operand(visit_result(mul->operands[1]))); + + this->result = src_reg(result); + return true; + } + + /** + * Possibly returns an instruction that set up @param reg. + * + * Sometimes we want to take the result of some expression/variable + * dereference tree and rewrite the instruction generating the result + * of the tree. When processing the tree, we know that the + * instructions generated are all writing temporaries that are dead + * outside of this tree. So, if we have some instructions that write + * a temporary, we're free to point that temp write somewhere else. + * + * Note that this doesn't guarantee that the instruction generated + * only reg -- it might be the size=4 destination of a texture instruction. + */ + instruction * + get_instruction_generating_reg(instruction *start, + instruction *end, + const src_vector ®) + { + if (end == start || + (end->predicate && end->opcode != BRW_OPCODE_SEL) || + reg.reladdr || reg.abs || reg.negate || + !is_identity_swizzle(get_writemask(end), reg.swizzle) || + !storage(reg).equals(src_reg(end->dst))) { + return NULL; + } else { + return end; + } + } + + bool + try_emit_saturate(ir_expression *ir) + { + instruction *pre_inst = (instruction *)this->instructions.get_tail(); + src_reg src = visit_result(ir->operands[0]); + instruction *last_inst = (instruction *)this->instructions.get_tail(); + + /* If the last instruction from our accept() generated our + * src, just set the saturate flag instead of emmitting a separate mov. + */ + instruction *modify = get_instruction_generating_reg( + pre_inst, last_inst, src); + + if (modify && modify->can_do_saturate() && + get_writemask(modify) == (1u << ir->type->vector_elements) - 1) { + modify->saturate = true; + this->result = src; + return true; + } + + return false; + } + + bool + try_emit_b2f_of_compare(ir_expression *ir) + { + /* This optimization relies on CMP setting the destination to 0 when + * false. Early hardware only sets the least significant bit, and + * leaves the other bits undefined. So we can't use it. + */ + if (brw->gen < 6) + return false; + + ir_expression *const cmp = ir->operands[0]->as_expression(); + if (cmp == NULL || + !(cmp->operation == ir_binop_less || + cmp->operation == ir_binop_greater || + cmp->operation == ir_binop_lequal || + cmp->operation == ir_binop_gequal || + cmp->operation == ir_binop_equal || + cmp->operation == ir_binop_nequal)) + return false; + + const src_reg src0 = visit_result(cmp->operands[0]); + const src_reg src1 = visit_result(cmp->operands[1]); + + this->result = src_reg(self().temporary_reg(ir->type)); + bld.CMP(dst_reg(this->result), src0, src1, + brw_conditional_for_comparison(cmp->operation)); + + /* If the comparison is false, this->result will just happen to be zero. + */ + exec_predicate_inv(BRW_PREDICATE_NORMAL, true, + bld.emit(BRW_OPCODE_SEL, dst_reg(this->result), + this->result, src_reg(1.0f))); + return true; + } + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void + emit_dp(const dst_reg &dst, const src_reg &src0, const src_reg &src1, + unsigned elements) + { + static enum opcode dot_opcodes[] = { + BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 + }; + + bld.emit(dot_opcodes[elements - 2], dst, src0, src1); + } + + void + visit(ir_expression *ir) + { + unsigned int operand; + src_reg op[Elements(ir->operands)]; + dst_reg temp; + bool is_scalar = true; + + /* Deal with the real oddball stuff first */ + switch (ir->operation) { + case ir_binop_add: + if (try_emit_mad(ir)) + return; + break; + case ir_unop_b2f: + if (try_emit_b2f_of_compare(ir)) + return; + break; + case ir_unop_saturate: + if (try_emit_saturate(ir)) + return; + break; + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + self().emit_interpolate_expression(ir); + return; + default: + break; + } + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + is_scalar &= ir->operands[operand]->type->is_scalar(); + op[operand] = visit_result(ir->operands[operand]); + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + /* Storage for our result. If our result goes into an assignment, it + * will just get copy-propagated out, so no worries. + */ + dst_reg result_dst = self().temporary_reg(ir->type); + this->result = src_reg(result_dst); + + switch (ir->operation) { + case ir_unop_logic_not: + if (ctx->Const.UniformBooleanTrue != 1) { + bld.NOT(result_dst, op[0]); + } else { + bld.XOR(result_dst, op[0], src_reg(1)); + } + break; + case ir_unop_neg: + op[0].negate = !op[0].negate; + bld.MOV(result_dst, op[0]); + break; + case ir_unop_abs: + op[0].abs = true; + op[0].negate = false; + bld.MOV(result_dst, op[0]); + break; + case ir_unop_sign: + if (ir->type->is_float()) { + /* AND(val, 0x80000000) gives the sign bit. + * + * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not + * zero. + */ + bld.CMP(bld.reg_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ); + + op[0].type = BRW_REGISTER_TYPE_UD; + result_dst.type = BRW_REGISTER_TYPE_UD; + bld.AND(result_dst, op[0], src_reg(0x80000000u)); + + exec_predicate(BRW_PREDICATE_NORMAL, + bld.OR(result_dst, src_reg(result_dst), + src_reg(0x3f800000u))); + this->result.type = BRW_REGISTER_TYPE_F; + } else { + /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). + * -> non-negative val generates 0x00000000. + * Predicated OR sets 1 if val is positive. + */ + bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G); + + bld.ASR(result_dst, op[0], src_reg(31)); + + exec_predicate(BRW_PREDICATE_NORMAL, + bld.OR(result_dst, this->result, src_reg(1))); + } + break; + case ir_unop_rcp: + bld.emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); + break; + case ir_unop_exp2: + bld.emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); + break; + case ir_unop_log2: + bld.emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + unreachable("not reached: should be handled by ir_explog_to_explog2"); + + case ir_unop_sin: + case ir_unop_sin_reduced: + bld.emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + case ir_unop_cos_reduced: + bld.emit_math(SHADER_OPCODE_COS, result_dst, op[0]); + break; + case ir_unop_dFdx: + bld.emit(FS_OPCODE_DDX, result_dst, op[0], src_reg(BRW_DERIVATIVE_BY_HINT)); + break; + case ir_unop_dFdx_coarse: + bld.emit(FS_OPCODE_DDX, result_dst, op[0], src_reg(BRW_DERIVATIVE_COARSE)); + break; + case ir_unop_dFdx_fine: + bld.emit(FS_OPCODE_DDX, result_dst, op[0], src_reg(BRW_DERIVATIVE_FINE)); + break; + case ir_unop_dFdy: + bld.emit(FS_OPCODE_DDY, result_dst, op[0], src_reg(BRW_DERIVATIVE_BY_HINT)); + break; + case ir_unop_dFdy_coarse: + bld.emit(FS_OPCODE_DDY, result_dst, op[0], src_reg(BRW_DERIVATIVE_COARSE)); + break; + case ir_unop_dFdy_fine: + bld.emit(FS_OPCODE_DDY, result_dst, op[0], src_reg(BRW_DERIVATIVE_FINE)); + break; + case ir_binop_add: + bld.ADD(result_dst, op[0], op[1]); + break; + case ir_binop_sub: + unreachable("not reached: should be handled by ir_sub_to_add_neg"); + case ir_binop_mul: + if (brw->gen < 8 && ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + */ + if (ir->operands[0]->is_uint16_constant()) { + if (brw->gen < 7) + bld.MUL(result_dst, op[0], op[1]); + else + bld.MUL(result_dst, op[1], op[0]); + } else if (ir->operands[1]->is_uint16_constant()) { + if (brw->gen < 7) + bld.MUL(result_dst, op[1], op[0]); + else + bld.MUL(result_dst, op[0], op[1]); + } else { + if (brw->gen >= 7) + self().no16("SIMD16 explicit accumulator operands unsupported\n"); + + dst_reg acc(retype(brw_acc_reg(bld.dispatch_width()), + this->result.type)); + + bld.MUL(acc, op[0], op[1]); + bld.MACH(bld.reg_null_d(), op[0], op[1]); + bld.MOV(result_dst, src_reg(acc)); + } + } else { + bld.MUL(result_dst, op[0], op[1]); + } + break; + case ir_binop_imul_high: { + if (brw->gen == 7) + self().no16("SIMD16 explicit accumulator operands unsupported\n"); + + dst_reg acc(retype(brw_acc_reg(bld.dispatch_width()), + this->result.type)); + + instruction *mul = bld.MUL(acc, op[0], op[1]); + bld.MACH(result_dst, op[0], op[1]); + + /* Until Gen8, integer multiplies read 32-bits from one source, and + * 16-bits from the other, and relying on the MACH instruction to + * generate the high bits of the result. + * + * On Gen8, the multiply instruction does a full 32x32-bit multiply, + * but in order to do a 64x64-bit multiply we have to simulate the + * previous behavior and then use a MACH instruction. + * + * FINISHME: Don't use source modifiers on src1. + */ + if (brw->gen >= 8) { + assert(mul->src[1].type == BRW_REGISTER_TYPE_D || + mul->src[1].type == BRW_REGISTER_TYPE_UD); + if (mul->src[1].type == BRW_REGISTER_TYPE_D) { + mul->src[1].type = BRW_REGISTER_TYPE_W; + } else { + mul->src[1].type = BRW_REGISTER_TYPE_UW; + } + } + break; + } + case ir_binop_div: + /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ + assert(ir->type->is_integer()); + bld.emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]); + break; + case ir_binop_carry: { + if (brw->gen == 7) + self().no16("SIMD16 explicit accumulator operands unsupported\n"); + + src_reg acc(retype(brw_acc_reg(bld.dispatch_width()), + BRW_REGISTER_TYPE_UD)); + + bld.ADDC(bld.reg_null_ud(), op[0], op[1]); + bld.MOV(result_dst, acc); + break; + } + case ir_binop_borrow: { + if (brw->gen == 7) + self().no16("SIMD16 explicit accumulator operands unsupported\n"); + + src_reg acc(retype(brw_acc_reg(bld.dispatch_width()), + BRW_REGISTER_TYPE_UD)); + + bld.SUBB(bld.reg_null_ud(), op[0], op[1]); + bld.MOV(result_dst, acc); + break; + } + case ir_binop_mod: + /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ + assert(ir->type->is_integer()); + bld.emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]); + break; + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + case ir_binop_all_equal: + case ir_binop_any_nequal: + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(ir->operands[0], &op[0]); + resolve_bool_comparison(ir->operands[1], &op[1]); + } + + if (!is_scalar && ir->type->is_scalar()) { + bld.CMP(bld.reg_null_d(), op[0], op[1], + brw_conditional_for_comparison(ir->operation)); + bld.MOV(result_dst, src_reg(0)); + exec_predicate((ir->operation == ir_binop_all_equal ? + BRW_PREDICATE_ALIGN16_ALL4H : + BRW_PREDICATE_ALIGN16_ANY4H), + bld.MOV(result_dst, + src_reg(ctx->Const.UniformBooleanTrue))); + } else { + bld.CMP(result_dst, op[0], op[1], + brw_conditional_for_comparison(ir->operation)); + } + break; + case ir_unop_any: + bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ); + bld.MOV(result_dst, src_reg(0)); + exec_predicate(BRW_PREDICATE_ALIGN16_ANY4H, + bld.MOV(result_dst, + src_reg(ctx->Const.UniformBooleanTrue))); + break; + case ir_binop_logic_xor: + bld.XOR(result_dst, op[0], op[1]); + break; + case ir_binop_logic_or: + bld.OR(result_dst, op[0], op[1]); + break; + case ir_binop_logic_and: + bld.AND(result_dst, op[0], op[1]); + break; + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); + break; + case ir_unop_noise: + unreachable("not reached: should be handled by lower_noise"); + + case ir_quadop_vector: + unreachable("not reached: should be handled by lower_quadop_vector"); + + case ir_binop_vector_extract: + unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()"); + + case ir_triop_vector_insert: + unreachable("not reached: should be handled by lower_vector_insert()"); + + case ir_binop_ldexp: + unreachable("not reached: should be handled by ldexp_to_arith()"); + + case ir_unop_sqrt: + bld.emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); + break; + case ir_unop_rsq: + bld.emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_bitcast_i2f: + case ir_unop_bitcast_u2f: + op[0].type = BRW_REGISTER_TYPE_F; + this->result = op[0]; + break; + case ir_unop_i2u: + case ir_unop_bitcast_f2u: + op[0].type = BRW_REGISTER_TYPE_UD; + this->result = op[0]; + break; + case ir_unop_u2i: + case ir_unop_bitcast_f2i: + op[0].type = BRW_REGISTER_TYPE_D; + this->result = op[0]; + break; + case ir_unop_i2f: + case ir_unop_u2f: + case ir_unop_f2i: + case ir_unop_f2u: + bld.MOV(result_dst, op[0]); + break; + case ir_unop_b2i: + bld.AND(result_dst, op[0], src_reg(1)); + break; + case ir_unop_b2f: + if (ctx->Const.UniformBooleanTrue != 1) { + op[0].type = BRW_REGISTER_TYPE_UD; + result_dst.type = BRW_REGISTER_TYPE_UD; + bld.AND(result_dst, op[0], src_reg(0x3f800000u)); + this->result.type = BRW_REGISTER_TYPE_F; + } else { + temp = self().temporary_reg(ir->operands[0]->type); + bld.AND(temp, op[0], src_reg(1)); + bld.MOV(result_dst, src_reg(temp)); + } + break; + case ir_unop_f2b: + bld.CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ); + break; + case ir_unop_i2b: + bld.CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ); + break; + case ir_unop_trunc: + bld.RNDZ(result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = !op[0].negate; + bld.RNDD(result_dst, op[0]); + this->result.negate = true; + break; + case ir_unop_floor: + bld.RNDD(result_dst, op[0]); + break; + case ir_unop_fract: + bld.FRC(result_dst, op[0]); + break; + case ir_unop_round_even: + bld.RNDE(result_dst, op[0]); + break; + case ir_binop_min: + case ir_binop_max: + bld.emit_minmax(ir->operation == ir_binop_min ? + BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE, + result_dst, op[0], op[1]); + break; + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_unorm_4x8: + unreachable("not reached: should be handled by lower_packing_builtins"); + case ir_unop_pack_half_2x16: + self().emit_pack_half_2x16(result_dst, op[0]); + break; + case ir_unop_unpack_half_2x16: + self().emit_unpack_half_2x16(result_dst, op[0]); + break; + case ir_unop_unpack_half_2x16_split_x: + bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result_dst, op[0]); + break; + case ir_unop_unpack_half_2x16_split_y: + bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result_dst, op[0]); + break; + case ir_binop_pow: + bld.emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); + break; + case ir_unop_bitfield_reverse: + bld.BFREV(result_dst, op[0]); + break; + case ir_unop_bit_count: + bld.CBIT(result_dst, op[0]); + break; + case ir_unop_find_msb: + temp = retype(self().temporary_reg(ir->type), + BRW_REGISTER_TYPE_UD); + bld.FBH(temp, op[0]); + + /* FBH counts from the MSB side, while GLSL's findMSB() wants the count + * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then + * subtract the result from 31 to convert the MSB count into an LSB count. + */ + + /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ + bld.MOV(result_dst, src_reg(temp)); + bld.CMP(bld.reg_null_d(), this->result, src_reg(-1), BRW_CONDITIONAL_NZ); + exec_predicate(BRW_PREDICATE_NORMAL, + bld.ADD(result_dst, negate(src_reg(temp)), + src_reg(31))); + break; + case ir_unop_find_lsb: + bld.FBL(result_dst, op[0]); + break; + case ir_unop_saturate: + bld.MOV(result_dst, op[0]) + ->saturate = true; + break; + case ir_triop_bitfield_extract: + /* Note that the instruction's argument order is reversed from GLSL + * and the IR. + */ + bld.BFE(result_dst, bld.fix_3src_operand(op[2]), + bld.fix_3src_operand(op[1]), + bld.fix_3src_operand(op[0])); + break; + case ir_binop_bfm: + bld.BFI1(result_dst, op[0], op[1]); + break; + case ir_triop_bfi: + bld.BFI2(result_dst, bld.fix_3src_operand(op[0]), + bld.fix_3src_operand(op[1]), + bld.fix_3src_operand(op[2])); + break; + case ir_quadop_bitfield_insert: + unreachable("not reached: should be handled by " + "lower_instructions::bitfield_insert_to_bfm_bfi"); + + case ir_unop_bit_not: + bld.NOT(result_dst, op[0]); + break; + case ir_binop_bit_and: + bld.AND(result_dst, op[0], op[1]); + break; + case ir_binop_bit_xor: + bld.XOR(result_dst, op[0], op[1]); + break; + case ir_binop_bit_or: + bld.OR(result_dst, op[0], op[1]); + break; + case ir_binop_lshift: + bld.SHL(result_dst, op[0], op[1]); + break; + case ir_binop_rshift: + if (ir->type->base_type == GLSL_TYPE_INT) + bld.ASR(result_dst, op[0], op[1]); + else + bld.SHR(result_dst, op[0], op[1]); + break; + case ir_binop_pack_half_2x16_split: + bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result_dst, op[0], op[1]); + break; + case ir_binop_ubo_load: { + /* This IR node takes a constant uniform block and a constant or + * variable byte offset within the block and loads a vector from that. + */ + ir_constant *const_uniform_block = ir->operands[0]->as_constant(); + ir_constant *const_offset = ir->operands[1]->as_constant(); + src_reg surf_index; + + if (const_uniform_block) { + /* The block index is a constant, so just emit the binding table entry + * as an immediate. + */ + surf_index = src_reg(stage_prog_data->binding_table.ubo_start + + const_uniform_block->value.u[0]); + } else { + /* The block index is not a constant. Evaluate the index expression + * per-channel and add the base UBO index; the generator will select + * a value from any live channel. + */ + surf_index = src_reg(bld.scalar_reg(BRW_REGISTER_TYPE_UD)); + exec_all(bld.ADD(dst_reg(surf_index), op[0], + src_reg(stage_prog_data->binding_table.ubo_start))); + + /* Assume this may touch any UBO. It would be nice to provide + * a tighter bound, but the array information is already lowered away. + */ + brw_mark_surface_used(stage_prog_data, + stage_prog_data->binding_table.ubo_start + + shader_prog->NumUniformBlocks - 1); + } + + if (const_offset) { + self().emit_pull_constant_load(bld, result_dst, surf_index, + const_offset->value.u[0], NULL, + ir->type->vector_elements); + } else { + src_reg reladdr(bld.scalar_reg(BRW_REGISTER_TYPE_D)); + + /* Turn the byte offset into alloc_size units. */ + bld.SHR(dst_reg(reladdr), op[1], src_reg(alloc_size == 4 ? 4 : 2)); + + self().emit_pull_constant_load(bld, result_dst, surf_index, 0, + &reladdr, ir->type->vector_elements); + } + + if (ir->type->base_type == GLSL_TYPE_BOOL) { + for (unsigned i = 0; i < CEILING(ir->type->vector_elements, + alloc_size); i++) { + /* UBO bools are any nonzero value. We consider bools to be + * values with the low bit set to 1. Convert them using CMP. + */ + bld.CMP(offset(result_dst, i), offset(result, i), + src_reg(0u), BRW_CONDITIONAL_NZ); + } + } + break; + } + case ir_triop_fma: + /* Note that the instruction's argument order is reversed from GLSL + * and the IR. + */ + bld.MAD(result_dst, bld.fix_3src_operand(op[2]), + bld.fix_3src_operand(op[1]), + bld.fix_3src_operand(op[0])); + break; + case ir_triop_lrp: + bld.LRP(result_dst, op[0], op[1], op[2]); + break; + case ir_triop_csel: + bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ); + exec_predicate(BRW_PREDICATE_NORMAL, + bld.emit(BRW_OPCODE_SEL, result_dst, op[1], op[2])); + break; + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + unreachable("already handled above"); + break; + } + } + + void + visit(ir_swizzle *ir) + { + const unsigned swz = compose_swizzle( + BRW_SWIZZLE4(ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w), + swizzle_for_size(ir->type->vector_elements)); + dst_vector dst = dst_vector_n(self().temporary_reg(ir->type), + ir->type->vector_elements); + src_vector src = swizzle(src_vector_n(visit_result(ir->val), 4), swz); + + if (reg_traits<src_reg>::allows_swizzle) { + this->result = storage(src); + } else { + bld.vector().MOV(dst, src); + this->result = src_reg(storage(dst)); + } + } + + unsigned + emit_assignment_writes(const dst_vector &l, const src_vector &r, + const glsl_type *type, enum brw_predicate predicate) + { + unsigned size = 0; + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: { + typename B::vector_builder vbld = bld.vector(); + + for (int j = 0; j < type->matrix_columns; j++) { + dst_vector dst = retype(offset(l, size), + brw_type_for_base_type(type)); + src_vector src = retype(offset(r, size), + brw_type_for_base_type(type)); + + exec_predicate(predicate, + vbld.MOV(resize(dst, type->vector_elements), src)); + + size += CEILING(type->vector_elements, alloc_size); + } + break; + } + case GLSL_TYPE_ARRAY: + for (unsigned i = 0; i < type->length; i++) + size += emit_assignment_writes(offset(l, size), + offset(r, size), + type->fields.array, + predicate); + break; + + case GLSL_TYPE_STRUCT: + for (unsigned i = 0; i < type->length; i++) + size += emit_assignment_writes(offset(l, size), + offset(r, size), + type->fields.structure[i].type, + predicate); + break; + + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + break; + + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + unreachable("not reached"); + } + + return size; + } + + /* If the RHS processing resulted in an instruction generating a + * temporary value, and it would be easy to rewrite the instruction to + * generate its result right into the LHS instead, do so. This ends + * up reliably removing instructions where it can be tricky to do so + * later without real UD chain information. + */ + bool + try_rewrite_rhs_to_dst(ir_assignment *ir, + const dst_vector &dst, const src_vector &src, + instruction *pre_rhs_inst, + instruction *last_rhs_inst) + { + /* Only attempt if we're doing a direct assignment. */ + if (ir->condition || + !(ir->lhs->type->is_scalar() || + (ir->lhs->type->is_vector()))) + return false; + + /* Make sure the last instruction generated our source reg. */ + instruction *modify = get_instruction_generating_reg( + pre_rhs_inst, last_rhs_inst, src); + if (!modify) + return false; + + /* If last_rhs_inst wrote a different number of components than our LHS, + * we can't safely rewrite it. + */ + if ((dst.writemask & ~get_writemask(modify)) || + ((~dst.writemask & get_writemask(modify)) && + !reg_traits<dst_reg>::allows_writemask)) + return false; + + /* Success! Rewrite the instruction. */ + modify->dst = storage(dst); + return true; + } + + void + visit(ir_assignment *ir) + { + const unsigned mask = (ir->lhs->type->is_vector() ? ir->write_mask : + ir->lhs->type->is_scalar() ? 0x1 : 0xf); + dst_vector l = writemask(dst_vector_n(visit_result(ir->lhs), 4), mask); + instruction *pre_rhs_inst = (instruction *)this->instructions.get_tail(); + src_vector r = swizzle(src_vector_n(visit_result(ir->rhs), 4), + from_glsl_swizzle(mask, SWIZZLE_XYZW)); + instruction *last_rhs_inst = (instruction *)this->instructions.get_tail(); + enum brw_predicate predicate = BRW_PREDICATE_NONE; + + if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) + return; + + if (ir->condition) + predicate = emit_bool_to_cond_code(ir->condition); + + emit_assignment_writes(l, r, ir->lhs->type, predicate); + } + + /* Sample from the MCS surface attached to this multisample texture. */ + src_reg + emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler) + { + typename B::vector_builder vbld = bld.vector(); + const unsigned reg_width = bld.dispatch_width() / 8; + const unsigned length = ir->coordinate->type->vector_elements; + const unsigned coord_mask = (1 << length) - 1; + const unsigned zero_mask = ((1 << alloc_size) - 1) & ~coord_mask; + dst_vector payload = vbld.natural_reg(brw_type_for_base_type( + ir->coordinate->type)); + dst_vector dst = vbld.natural_reg(BRW_REGISTER_TYPE_UD); + + vbld.MOV(writemask(payload, coord_mask), src_vector_n(coordinate, length)); + vbld.MOV(writemask(payload, zero_mask), src_reg(0)); + + instruction *inst = bld.emit(SHADER_OPCODE_TXF_MCS, storage(dst), + src_reg(storage(payload)), sampler); + inst->base_mrf = -1; + inst->mlen = CEILING(length, alloc_size) * reg_width; + /* We only care about one component of response, but the sampler always + * writes 4. + */ + inst->regs_written = CEILING(4, alloc_size) * reg_width; + return src_reg(storage(dst)); + } + + /** + * Apply workarounds for Gen6 gather with UINT/SINT + */ + void + emit_gen6_gather_wa(uint8_t wa, dst_reg dst) + { + if (!wa) + return; + + typename B::vector_builder vbld = bld.vector(); + const unsigned width = (wa & WA_8BIT) ? 8 : 16; + dst_vector vdst = dst_vector_n(dst, 4); + dst_vector vdst_f = retype(vdst, BRW_REGISTER_TYPE_F); + + /* Convert from UNORM to UINT */ + vbld.MUL(vdst_f, src_vector(vdst_f), src_reg((float)((1 << width) - 1))); + vbld.MOV(vdst, src_vector(vdst_f)); + + if (wa & WA_SIGN) { + /* Reinterpret the UINT value as a signed INT value by shifting the + * sign bit into place, then shifting back preserving sign. + */ + vbld.SHL(vdst, src_vector(vdst), src_reg(32 - width)); + vbld.ASR(vdst, src_vector(vdst), src_reg(32 - width)); + } + } + + /** + * Swizzle the result of a texture instruction. This is necessary for + * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow + * comparisons. + */ + void + swizzle_result(ir_texture *ir, const src_reg &orig_val, uint32_t sampler) + { + const unsigned swz = self().sampler_prog_key()->swizzles[sampler]; + + if (ir->op == ir_query_levels) { + /* # levels is in .w */ + this->result = component(src_vector_n(orig_val, 4), 3); + return; + } + + this->result = resize(orig_val, ir->type->vector_elements); + + /* txs,lod don't actually sample the texture, so swizzling the result + * makes no sense. + */ + if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4) + return; + + if (ir->type == glsl_type::float_type) { + /* Ignore DEPTH_TEXTURE_MODE swizzling. */ + assert(ir->sampler->type->sampler_shadow); + + } else if (swz != SWIZZLE_NOOP) { + typename B::vector_builder vbld = bld.vector(); + dst_vector dst = vbld.natural_reg(orig_val.type); + unsigned zero_mask = 0, one_mask = 0, copy_mask = 0; + + for (int i = 0; i < 4; i++) { + switch (GET_SWZ(swz, i)) { + case SWIZZLE_ZERO: + zero_mask |= (1 << i); + break; + case SWIZZLE_ONE: + one_mask |= (1 << i); + break; + default: + copy_mask |= (1 << i); + break; + } + } + + if (copy_mask) + vbld.MOV(writemask(dst, copy_mask), + swizzle(src_vector_n(orig_val, 4), + from_glsl_swizzle(0xf, swz))); + + if (zero_mask) + vbld.MOV(writemask(dst, zero_mask), src_reg(0.0f)); + + if (one_mask) + vbld.MOV(writemask(dst, one_mask), src_reg(1.0f)); + + this->result = src_reg(storage(dst)); + } + } + + /** + * Set up the gather channel based on the swizzle, for gather4. + */ + uint32_t + gather_channel(ir_texture *ir, uint32_t sampler) + { + const struct brw_sampler_prog_key_data *tex = self().sampler_prog_key(); + ir_constant *chan = ir->lod_info.component->as_constant(); + const unsigned swiz = GET_SWZ(tex->swizzles[sampler], chan->value.i[0]); + + switch (swiz) { + case SWIZZLE_X: return 0; + case SWIZZLE_Y: + /* gather4 sampler is broken for green channel on RG32F -- + * we must ask for blue instead. + */ + return (tex->gather_channel_quirk_mask & (1 << sampler) ? 2 : 1); + case SWIZZLE_Z: return 2; + case SWIZZLE_W: return 3; + default: + unreachable("Not reached"); /* zero, one swizzles handled already */ + } + } + + src_reg + rescale_texcoord(ir_texture *ir, src_reg coordinate, + bool is_rect, uint32_t sampler, int texunit) + { + typename B::vector_builder vbld = bld.vector(); + const struct brw_sampler_prog_key_data *tex = self().sampler_prog_key(); + const unsigned clamp_mask = + ((tex->gl_clamp_mask[0] & (1 << sampler) ? 1 : 0) << 0) | + ((tex->gl_clamp_mask[1] & (1 << sampler) ? 1 : 0) << 1) | + ((tex->gl_clamp_mask[2] & (1 << sampler) ? 1 : 0) << 2); + src_vector scale; + + /* The 965 requires the EU to do the normalization of GL rectangle + * texture coordinates. We use the program parameter state + * tracking to get the scaling factor. + */ + if (is_rect && (brw->gen < 6 || (brw->gen >= 6 && clamp_mask))) { + struct gl_program_parameter_list *params = prog->Parameters; + int tokens[STATE_LENGTH] = { + STATE_INTERNAL, + STATE_TEXRECT_SCALE, + texunit, + 0, + 0 + }; + + self().no16("rectangle scale uniform setup not supported on SIMD16\n"); + if (bld.dispatch_width() == 16) { + return coordinate; + } + + GLuint index = _mesa_add_state_reference(params, + (gl_state_index *)tokens); + /* Try to find existing copies of the texrect scale uniforms. */ + for (unsigned i = 0; i < uniforms; i++) { + if (stage_prog_data->param[alloc_size * i] == + &prog->Parameters->ParameterValues[index][0]) { + scale = src_vector_n(src_reg(UNIFORM, i), 2); + break; + } + } + + /* If we didn't already set them up, do so now. */ + if (storage(scale).file == BAD_FILE) { + scale = src_vector_n(src_reg(UNIFORM, uniforms), 2); + stage_prog_data->param[alloc_size * uniforms] = + &prog->Parameters->ParameterValues[index][0]; + stage_prog_data->param[alloc_size * uniforms + 1] = + &prog->Parameters->ParameterValues[index][1]; + uniform_vector_size[uniforms] = 2; + uniforms += CEILING(2, alloc_size); + } + } + + /* The 965 requires the EU to do the normalization of GL rectangle + * texture coordinates. We use the program parameter state + * tracking to get the scaling factor. + */ + if (brw->gen >= 6 && is_rect) { + /* On gen6+, the sampler handles the rectangle coordinates + * natively, without needing rescaling. But that means we have + * to do GL_CLAMP clamping at the [0, width], [0, height] scale, + * not [0, 1] like the default case below. + */ + dst_vector dst = dst_vector_n(coordinate, 4); + exec_condmod(BRW_CONDITIONAL_G, + vbld.emit(BRW_OPCODE_SEL, writemask(dst, clamp_mask), + src_vector(dst), src_reg(0.0f))); + + /* Our parameter comes in as 1.0/width or 1.0/height, + * because that's what people normally want for doing + * texture rectangle handling. We need width or height + * for clamping, but we don't care enough to make a new + * parameter type, so just invert back. + */ + dst_vector limit = vbld.natural_reg(BRW_REGISTER_TYPE_F); + vbld.MOV(limit, scale); + vbld.emit(SHADER_OPCODE_RCP, writemask(limit, clamp_mask), + src_vector(limit)); + + exec_condmod(BRW_CONDITIONAL_L, + vbld.emit(BRW_OPCODE_SEL, writemask(dst, clamp_mask), + src_vector(dst), src_vector(limit))); + } else { + if (is_rect) { + dst_vector dst = vbld.natural_reg(brw_type_for_base_type(ir->type)); + src_vector src = src_vector_n(coordinate, 4); + coordinate = src_reg(storage(dst)); + vbld.MUL(writemask(dst, WRITEMASK_XY), src, scale); + } + + if (ir->coordinate) { + dst_vector dst = dst_vector_n(coordinate, 4); + exec_saturate(true, + vbld.MOV(writemask(dst, clamp_mask), src_vector(dst))); + } + } + + return coordinate; + } + + void + visit(ir_texture *ir) + { + typename B::vector_builder vbld = bld.vector(); + const struct brw_sampler_prog_key_data *tex = self().sampler_prog_key(); + uint32_t sampler = + _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); + ir_rvalue *nonconst_sampler_index = + _mesa_get_sampler_array_nonconst_index(ir->sampler); + + /* Handle non-constant sampler array indexing */ + src_reg sampler_reg; + if (nonconst_sampler_index) { + /* The highest sampler which may be used by this operation is + * the last element of the array. Mark it here, because the generator + * doesn't have enough information to determine the bound. + */ + uint32_t array_size = ir->sampler->as_dereference_array() + ->array->type->array_size(); + + uint32_t max_used = sampler + array_size - 1; + if (ir->op == ir_tg4 && brw->gen < 8) { + max_used += stage_prog_data->binding_table.gather_texture_start; + } else { + max_used += stage_prog_data->binding_table.texture_start; + } + + brw_mark_surface_used(stage_prog_data, max_used); + + /* Emit code to evaluate the actual indexing expression */ + dst_reg tmp = bld.scalar_reg(BRW_REGISTER_TYPE_UD); + exec_all(bld.ADD(tmp, visit_result(nonconst_sampler_index), + src_reg(sampler))); + sampler_reg = src_reg(tmp); + } else { + /* Single sampler, or constant array index; the indexing expression + * is just an immediate. + */ + sampler_reg = src_reg(sampler); + } + + /* FINISHME: We're failing to recompile our programs when the sampler is + * updated. This only matters for the texture rectangle scale parameters + * (pre-gen6, or gen6+ with GL_CLAMP). + */ + int texunit = prog->SamplerUnits[sampler]; + + if (ir->op == ir_tg4) { + /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother + * emitting anything other than setting up the constant result. + */ + ir_constant *chan = ir->lod_info.component->as_constant(); + int swiz = GET_SWZ(tex->swizzles[sampler], chan->value.i[0]); + if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { + dst_vector res = vbld.natural_reg(BRW_REGISTER_TYPE_F); + vbld.MOV(res, src_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)); + this->result = src_reg(storage(res)); + return; + } + } + + /* Should be lowered by do_lower_texture_projection */ + assert(!ir->projector); + + /* Should be lowered */ + assert(!ir->offset || !ir->offset->type->is_array()); + + /* Generate code to compute all the subexpression trees. This has to be + * done before loading any values into MRFs for the sampler message since + * generating these values may involve SEND messages that need the MRFs. + */ + src_reg coordinate; + if (ir->coordinate) + coordinate = rescale_texcoord(ir, visit_result(ir->coordinate), + ir->sampler->type->sampler_dimensionality == + GLSL_SAMPLER_DIM_RECT, + sampler, texunit); + + src_reg shadow_comparitor; + if (ir->shadow_comparitor) + shadow_comparitor = visit_result(ir->shadow_comparitor); + + src_reg offset_val; + if (ir->offset && !ir->offset->as_constant()) + offset_val = visit_result(ir->offset); + + src_reg lod(0.0f), lod2, sample_index, mcs; + switch (ir->op) { + case ir_tex: + case ir_lod: + case ir_tg4: + case ir_query_levels: + break; + case ir_txb: + lod = visit_result(ir->lod_info.bias); + break; + case ir_txd: + lod = visit_result(ir->lod_info.grad.dPdx); + lod2 = visit_result(ir->lod_info.grad.dPdy); + break; + case ir_txf: + case ir_txl: + case ir_txs: + lod = visit_result(ir->lod_info.lod); + break; + case ir_txf_ms: + sample_index = visit_result(ir->lod_info.sample_index); + + if (brw->gen >= 7 && tex->compressed_multisample_layout_mask & (1<<sampler)) + mcs = emit_mcs_fetch(ir, coordinate, sampler_reg); + else + mcs = src_reg(0u); + break; + default: + unreachable("Unrecognized texture opcode"); + }; + + /* Writemasking doesn't eliminate channels on SIMD8 texture + * samples, so don't worry about them. + */ + dst_reg dst = storage(vbld.natural_reg(brw_type_for_base_type(ir->type))); + instruction *inst = self().emit_texture( + ir, dst, coordinate, shadow_comparitor, + lod, lod2, offset_val, sample_index, mcs, sampler_reg); + + if (ir->offset != NULL && ir->op != ir_txf) + inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant()); + + if (ir->op == ir_tg4) + inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17 + + if (ir->shadow_comparitor) + inst->shadow_compare = true; + + /* fixup #layers for cube map arrays */ + if (ir->op == ir_txs) { + glsl_type const *type = ir->sampler->type; + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + type->sampler_array) { + const unsigned components = ir->type->vector_elements; + dst_vector vdst = dst_vector_n(dst, components); + dst_reg depth = bld.scalar_reg(BRW_REGISTER_TYPE_D); + src_reg payload[4]; + + bld.emit_math(SHADER_OPCODE_INT_QUOTIENT, depth, + component(vdst, 2), src_reg(6)); + + for (unsigned i = 0; i < components; ++i) + payload[i] = (i == 2 ? src_reg(depth) : component(vdst, i)); + + vbld.LOAD_VECTOR(vdst, payload); + } + } + + if (brw->gen == 6 && ir->op == ir_tg4) + emit_gen6_gather_wa(tex->gen6_gather_wa[sampler], dst); + + swizzle_result(ir, src_reg(dst), sampler); + } + + struct hash_table *variable_ht; + + typename B::src_reg shader_start_time; + B bld; + + const bool uses_kill; + +public: + int + type_size(const struct glsl_type *type) + { + unsigned int size, i; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns * type_size(type->column_type()); + } else { + return CEILING(type->components(), alloc_size); + } + case GLSL_TYPE_ARRAY: + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up no register space, since they're baked in at + * link time. + */ + return 0; + case GLSL_TYPE_ATOMIC_UINT: + return 0; + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + unreachable("not reached"); + } + + return 0; + } + + /** + * Returns how many MRFs an opcode will write over. + * + * Note that this is not the 0 or 1 implied writes in an actual gen + * instruction -- the generate_* functions generate additional MOVs + * for setup. + */ + int + implied_mrf_writes(instruction *inst) + { + if (inst->mlen == 0 || inst->base_mrf == -1) + return 0; + + switch (inst->opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + return 1 * bld.dispatch_width() / 8; + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: + case SHADER_OPCODE_POW: + return 2 * bld.dispatch_width() / 8; + case VS_OPCODE_URB_WRITE: + return 1; + case VS_OPCODE_PULL_CONSTANT_LOAD: + return 2; + case SHADER_OPCODE_GEN4_SCRATCH_READ: + return inst->mlen; + case SHADER_OPCODE_GEN4_SCRATCH_WRITE: + return inst->mlen; + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + return 1; + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: + return inst->mlen; + case GS_OPCODE_URB_WRITE: + case GS_OPCODE_URB_WRITE_ALLOCATE: + case GS_OPCODE_THREAD_END: + return 0; + case GS_OPCODE_FF_SYNC: + return 1; + case SHADER_OPCODE_SHADER_TIME_ADD: + return 0; + case SHADER_OPCODE_TEX: + case SHADER_OPCODE_TXL: + case SHADER_OPCODE_TXD: + case SHADER_OPCODE_TXF: + case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_MCS: + case SHADER_OPCODE_TXS: + case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_LOD: + case FS_OPCODE_TXB: + return inst->header_present ? 1 : 0; + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case FS_OPCODE_INTERPOLATE_AT_CENTROID: + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + return 0; + case FS_OPCODE_FB_WRITE: + return 2; + default: + unreachable("not reached"); + } + } + + static const unsigned alloc_size = reg_traits<src_reg>::alloc_size; + + brw::simple_allocator alloc; + + /* Result of last visit() method. */ + typename B::src_reg result; + + const shader_time_shader_type st_type; +}; + +} /* namespace brw */ + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index b37da4ead62..1eaef45bf68 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -284,10 +284,10 @@ get_written_and_reset(struct brw_context *brw, int i, /* Find where we recorded written and reset. */ int wi, ri; - for (wi = i; brw->shader_time.types[wi] != type + 1; wi++) + for (wi = i; brw->shader_time.types[wi] != type + ST_WRITTEN; wi++) ; - for (ri = i; brw->shader_time.types[ri] != type + 2; ri++) + for (ri = i; brw->shader_time.types[ri] != type + ST_RESET; ri++) ; *written = brw->shader_time.cumulative[wi]; @@ -328,27 +328,18 @@ brw_report_shader_time(struct brw_context *brw) sorted[i] = &scaled[i]; - switch (type) { - case ST_VS_WRITTEN: - case ST_VS_RESET: - case ST_GS_WRITTEN: - case ST_GS_RESET: - case ST_FS8_WRITTEN: - case ST_FS8_RESET: - case ST_FS16_WRITTEN: - case ST_FS16_RESET: + switch (type % ST_NUM_ENTRIES) { + case ST_BASE: + get_written_and_reset(brw, i, &written, &reset); + break; + + case ST_WRITTEN: + case ST_RESET: /* We'll handle these when along with the time. */ scaled[i] = 0; continue; - case ST_VS: - case ST_GS: - case ST_FS8: - case ST_FS16: - get_written_and_reset(brw, i, &written, &reset); - break; - - default: + case ST_SUM: /* I sometimes want to print things that aren't the 3 shader times. * Just print the sum in that case. */ diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index a8650c3454b..9cd391471da 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -24,6 +24,8 @@ #ifndef BRW_PROGRAM_H #define BRW_PROGRAM_H +#include "main/mtypes.h" + enum gen6_gather_sampler_wa { WA_SIGN = 1, /* whether we need to sign extend */ WA_8BIT = 2, /* if we have an 8bit format needing wa */ diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 19af0ae09fc..a27a3bad396 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -80,6 +80,7 @@ struct brw_context; #define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) #define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) +#define BRW_SWIZZLE_XXYY BRW_SWIZZLE4(0,0,1,1) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) #define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3) #define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3) diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index db94e527ca7..4017b14ddc0 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -395,7 +395,7 @@ schedule_node::set_latency_gen7(bool is_haswell) class instruction_scheduler { public: - instruction_scheduler(backend_visitor *v, int grf_count, + instruction_scheduler(base_visitor *v, int grf_count, instruction_scheduler_mode mode) { this->bv = v; @@ -451,7 +451,7 @@ public: int grf_count; int time; exec_list instructions; - backend_visitor *bv; + base_visitor *bv; instruction_scheduler_mode mode; @@ -1081,12 +1081,14 @@ vec4_instruction_scheduler::calculate_deps() } } - for (int i = 0; i < inst->mlen; i++) { - /* It looks like the MRF regs are released in the send - * instruction once it's sent, not when the result comes - * back. - */ - add_dep(last_mrf_write[inst->base_mrf + i], n); + if (inst->base_mrf != -1) { + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + add_dep(last_mrf_write[inst->base_mrf + i], n); + } } if (inst->reads_flag()) { @@ -1116,7 +1118,7 @@ vec4_instruction_scheduler::calculate_deps() add_barrier_deps(n); } - if (inst->mlen > 0) { + if (inst->mlen > 0 && inst->base_mrf != -1) { for (int i = 0; i < v->implied_mrf_writes(inst); i++) { add_dep(last_mrf_write[inst->base_mrf + i], n); last_mrf_write[inst->base_mrf + i] = n; @@ -1171,12 +1173,14 @@ vec4_instruction_scheduler::calculate_deps() } } - for (int i = 0; i < inst->mlen; i++) { - /* It looks like the MRF regs are released in the send - * instruction once it's sent, not when the result comes - * back. - */ - add_dep(n, last_mrf_write[inst->base_mrf + i], 2); + if (inst->base_mrf != -1) { + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + add_dep(n, last_mrf_write[inst->base_mrf + i], 2); + } } if (inst->reads_flag()) { @@ -1203,7 +1207,7 @@ vec4_instruction_scheduler::calculate_deps() add_barrier_deps(n); } - if (inst->mlen > 0) { + if (inst->mlen > 0 && inst->base_mrf != -1) { for (int i = 0; i < v->implied_mrf_writes(inst); i++) { last_mrf_write[inst->base_mrf + i] = n; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 05f6fe78523..18cee8722af 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -544,23 +544,6 @@ brw_instruction_name(enum opcode op) } } -backend_visitor::backend_visitor(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - gl_shader_stage stage) - : brw(brw), - ctx(&brw->ctx), - shader(shader_prog ? - (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL), - shader_prog(shader_prog), - prog(prog), - stage_prog_data(stage_prog_data), - cfg(NULL), - stage(stage) -{ -} - bool backend_reg::is_zero() const { @@ -829,104 +812,3 @@ backend_instruction::remove(bblock_t *block) exec_node::remove(); } - -void -backend_visitor::dump_instructions() -{ - dump_instructions(NULL); -} - -void -backend_visitor::dump_instructions(const char *name) -{ - FILE *file = stderr; - if (name && geteuid() != 0) { - file = fopen(name, "w"); - if (!file) - file = stderr; - } - - int ip = 0; - foreach_block_and_inst(block, backend_instruction, inst, cfg) { - if (!name) - fprintf(stderr, "%d: ", ip++); - dump_instruction(inst, file); - } - - if (file != stderr) { - fclose(file); - } -} - -void -backend_visitor::calculate_cfg() -{ - if (this->cfg) - return; - cfg = new(mem_ctx) cfg_t(&this->instructions); -} - -void -backend_visitor::invalidate_cfg() -{ - ralloc_free(this->cfg); - this->cfg = NULL; -} - -/** - * Sets up the starting offsets for the groups of binding table entries - * commong to all pipeline stages. - * - * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're - * unused but also make sure that addition of small offsets to them will - * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. - */ -void -backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) -{ - int num_textures = _mesa_fls(prog->SamplersUsed); - - stage_prog_data->binding_table.texture_start = next_binding_table_offset; - next_binding_table_offset += num_textures; - - if (shader) { - stage_prog_data->binding_table.ubo_start = next_binding_table_offset; - next_binding_table_offset += shader->base.NumUniformBlocks; - } else { - stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0; - } - - if (INTEL_DEBUG & DEBUG_SHADER_TIME) { - stage_prog_data->binding_table.shader_time_start = next_binding_table_offset; - next_binding_table_offset++; - } else { - stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0; - } - - if (prog->UsesGather) { - if (brw->gen >= 8) { - stage_prog_data->binding_table.gather_texture_start = - stage_prog_data->binding_table.texture_start; - } else { - stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset; - next_binding_table_offset += num_textures; - } - } else { - stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; - } - - if (shader_prog && shader_prog->NumAtomicBuffers) { - stage_prog_data->binding_table.abo_start = next_binding_table_offset; - next_binding_table_offset += shader_prog->NumAtomicBuffers; - } else { - stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; - } - - /* This may or may not be used depending on how the compile goes. */ - stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset; - next_binding_table_offset++; - - assert(next_binding_table_offset <= BRW_MAX_SURFACES); - - /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */ -} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 0f927acfc4a..0c75b4c3ee2 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -21,18 +21,14 @@ * IN THE SOFTWARE. */ +#pragma once + #include <stdint.h> #include "brw_reg.h" #include "brw_defines.h" #include "main/compiler.h" #include "glsl/ir.h" -#ifdef __cplusplus -#include "brw_ir_allocator.h" -#endif - -#pragma once - enum PACKED register_file { BAD_FILE, GRF, @@ -149,52 +145,6 @@ enum instruction_scheduler_mode { SCHEDULE_POST, }; -class backend_visitor : public ir_visitor { -protected: - - backend_visitor(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - gl_shader_stage stage); - -public: - - struct brw_context * const brw; - struct gl_context * const ctx; - struct brw_shader * const shader; - struct gl_shader_program * const shader_prog; - struct gl_program * const prog; - struct brw_stage_prog_data * const stage_prog_data; - - /** ralloc context for temporary data used during compile */ - void *mem_ctx; - - /** - * List of either fs_inst or vec4_instruction (inheriting from - * backend_instruction) - */ - exec_list instructions; - - cfg_t *cfg; - - gl_shader_stage stage; - - brw::simple_allocator alloc; - - virtual void dump_instruction(backend_instruction *inst) = 0; - virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0; - virtual void dump_instructions(); - virtual void dump_instructions(const char *name); - - void calculate_cfg(); - void invalidate_cfg(); - - void assign_common_binding_table_offsets(uint32_t next_binding_table_offset); - - virtual void invalidate_live_intervals() = 0; -}; - uint32_t brw_texture_offset(struct gl_context *ctx, ir_constant *offset); #endif /* __cplusplus */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 51a2390e764..49c762aec45 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -169,6 +169,21 @@ dst_reg::dst_reg(const src_reg ®) } bool +dst_reg::equals(const dst_reg &r) const +{ + return (file == r.file && + reg == r.reg && + reg_offset == r.reg_offset && + type == r.type && + negate == r.negate && + abs == r.abs && + writemask == r.writemask && + !reladdr && !r.reladdr && + memcmp(&fixed_hw_reg, &r.fixed_hw_reg, + sizeof(fixed_hw_reg)) == 0); +} + +bool vec4_instruction::is_send_from_grf() { switch (opcode) { @@ -195,66 +210,6 @@ vec4_instruction::can_do_source_mods(struct brw_context *brw) return true; } -/** - * Returns how many MRFs an opcode will write over. - * - * Note that this is not the 0 or 1 implied writes in an actual gen - * instruction -- the generate_* functions generate additional MOVs - * for setup. - */ -int -vec4_visitor::implied_mrf_writes(vec4_instruction *inst) -{ - if (inst->mlen == 0) - return 0; - - switch (inst->opcode) { - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - return 1; - case SHADER_OPCODE_INT_QUOTIENT: - case SHADER_OPCODE_INT_REMAINDER: - case SHADER_OPCODE_POW: - return 2; - case VS_OPCODE_URB_WRITE: - return 1; - case VS_OPCODE_PULL_CONSTANT_LOAD: - return 2; - case SHADER_OPCODE_GEN4_SCRATCH_READ: - return 2; - case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - return 3; - case GS_OPCODE_URB_WRITE: - case GS_OPCODE_URB_WRITE_ALLOCATE: - case GS_OPCODE_THREAD_END: - return 0; - case GS_OPCODE_FF_SYNC: - return 1; - case SHADER_OPCODE_SHADER_TIME_ADD: - return 0; - case SHADER_OPCODE_TEX: - case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXD: - case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXF_MCS: - case SHADER_OPCODE_TXS: - case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: - return inst->header_present ? 1 : 0; - case SHADER_OPCODE_UNTYPED_ATOMIC: - case SHADER_OPCODE_UNTYPED_SURFACE_READ: - return 0; - default: - unreachable("not reached"); - } -} - bool src_reg::equals(const src_reg &r) const { @@ -545,7 +500,7 @@ vec4_visitor::split_uniform_registers() } /* Update that everything is now vector-sized. */ - for (int i = 0; i < this->uniforms; i++) { + for (unsigned i = 0; i < this->uniforms; i++) { this->uniform_size[i] = 1; } } @@ -574,12 +529,12 @@ vec4_visitor::pack_uniform_registers() } } - int new_uniform_count = 0; + unsigned new_uniform_count = 0; /* Now, figure out a packing of the live uniform vectors into our * push constants. */ - for (int src = 0; src < uniforms; src++) { + for (unsigned src = 0; src < uniforms; src++) { assert(src < uniform_array_size); int size = this->uniform_vector_size[src]; @@ -588,7 +543,7 @@ vec4_visitor::pack_uniform_registers() continue; } - int dst; + unsigned dst; /* Find the lowest place we can slot this uniform in. */ for (dst = 0; dst < src; dst++) { if (this->uniform_vector_size[dst] + size <= 4) @@ -725,7 +680,7 @@ vec4_visitor::move_push_constants_to_pull_constants() * If changing this value, note the limitation about total_regs in * brw_curbe.c. */ - int max_uniform_components = 32 * 8; + unsigned max_uniform_components = 32 * 8; if (this->uniforms * 4 <= max_uniform_components) return; @@ -734,7 +689,7 @@ vec4_visitor::move_push_constants_to_pull_constants() * look for the most infrequently used uniform vec4s, but leave * that for later. */ - for (int i = 0; i < this->uniforms * 4; i += 4) { + for (unsigned i = 0; i < this->uniforms * 4; i += 4) { pull_constant_loc[i / 4] = -1; if (i >= max_uniform_components) { @@ -778,12 +733,13 @@ vec4_visitor::move_push_constants_to_pull_constants() pull_constant_loc[inst->src[i].reg] == -1) continue; - int uniform = inst->src[i].reg; - - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + vec4_builder ibld = bld.at(block, inst); + int loc = pull_constant_loc[inst->src[i].reg] + inst->src[i].reg_offset; + src_reg surf_index(prog_data->base.binding_table.pull_constants_start); + dst_reg temp = ibld.vector().natural_reg(BRW_REGISTER_TYPE_F); - emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + emit_pull_constant_load(ibld, temp, surf_index, 16 * loc, + inst->src[i].reladdr, 4); inst->src[i].file = temp.file; inst->src[i].reg = temp.reg; @@ -1578,97 +1534,6 @@ vec4_visitor::assign_binding_table_offsets() assign_common_binding_table_offsets(0); } -src_reg -vec4_visitor::get_timestamp() -{ - assert(brw->gen >= 7); - - src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_TIMESTAMP, - 0, - BRW_REGISTER_TYPE_UD, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_4, - BRW_HORIZONTAL_STRIDE_4, - BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW)); - - dst_reg dst = dst_reg(this, glsl_type::uvec4_type); - - vec4_instruction *mov = emit(MOV(dst, ts)); - /* We want to read the 3 fields we care about (mostly field 0, but also 2) - * even if it's not enabled in the dispatch. - */ - mov->force_writemask_all = true; - - return src_reg(dst); -} - -void -vec4_visitor::emit_shader_time_begin() -{ - current_annotation = "shader time start"; - shader_start_time = get_timestamp(); -} - -void -vec4_visitor::emit_shader_time_end() -{ - current_annotation = "shader time end"; - src_reg shader_end_time = get_timestamp(); - - - /* Check that there weren't any timestamp reset events (assuming these - * were the only two timestamp reads that happened). - */ - src_reg reset_end = shader_end_time; - reset_end.swizzle = BRW_SWIZZLE_ZZZZ; - vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u))); - test->conditional_mod = BRW_CONDITIONAL_Z; - - emit(IF(BRW_PREDICATE_NORMAL)); - - /* Take the current timestamp and get the delta. */ - shader_start_time.negate = true; - dst_reg diff = dst_reg(this, glsl_type::uint_type); - emit(ADD(diff, shader_start_time, shader_end_time)); - - /* If there were no instructions between the two timestamp gets, the diff - * is 2 cycles. Remove that overhead, so I can forget about that when - * trying to determine the time taken for single instructions. - */ - emit(ADD(diff, src_reg(diff), src_reg(-2u))); - - emit_shader_time_write(st_base, src_reg(diff)); - emit_shader_time_write(st_written, src_reg(1u)); - emit(BRW_OPCODE_ELSE); - emit_shader_time_write(st_reset, src_reg(1u)); - emit(BRW_OPCODE_ENDIF); -} - -void -vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, - src_reg value) -{ - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, type); - - dst_reg dst = - dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2)); - - dst_reg offset = dst; - dst_reg time = dst; - time.reg_offset++; - - offset.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE))); - - time.type = BRW_REGISTER_TYPE_UD; - emit(MOV(time, src_reg(value))); - - emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst)); -} - bool vec4_visitor::run() { @@ -1689,13 +1554,13 @@ vec4_visitor::run() } else { emit_program_code(); } - base_ir = NULL; if (key->userclip_active && !prog->UsesClipDistanceOut) setup_uniform_clipplane_values(); emit_thread_end(); + bld = bld.at(NULL, NULL); calculate_cfg(); /* Before any optimization, push array accesses out to scratch @@ -1731,7 +1596,7 @@ vec4_visitor::run() snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \ stage_name, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ \ - backend_visitor::dump_instructions(filename); \ + base_visitor::dump_instructions(filename); \ } \ \ progress = progress || this_progress; \ @@ -1743,7 +1608,7 @@ vec4_visitor::run() snprintf(filename, 64, "%s-%04d-00-start", stage_name, shader_prog ? shader_prog->Name : 0); - backend_visitor::dump_instructions(filename); + base_visitor::dump_instructions(filename); } bool progress; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 39c65b7b8ed..ab71a5a13ad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -25,13 +25,10 @@ #define BRW_VEC4_H #include <stdint.h> -#include "brw_shader.h" #include "main/compiler.h" -#include "program/hash_table.h" #include "brw_program.h" - #ifdef __cplusplus -#include "brw_ir_vec4.h" +#include "brw_ir_visitor.h" extern "C" { #endif @@ -93,7 +90,7 @@ namespace brw { * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and * fixed-function) into VS IR. */ -class vec4_visitor : public backend_visitor +class vec4_visitor : public backend_visitor<vec4_visitor, vec4_builder> { public: vec4_visitor(struct brw_context *brw, @@ -106,53 +103,17 @@ public: void *mem_ctx, bool debug_flag, bool no_spills, - shader_time_shader_type st_base, - shader_time_shader_type st_written, - shader_time_shader_type st_reset); - ~vec4_visitor(); - - dst_reg dst_null_f() - { - return dst_reg(brw_null_reg()); - } - - dst_reg dst_null_d() - { - return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - } - - dst_reg dst_null_ud() - { - return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); - } + shader_time_shader_type st_type); struct brw_vec4_compile * const c; const struct brw_vec4_prog_key * const key; struct brw_vec4_prog_data * const prog_data; unsigned int sanity_param_count; - char *fail_msg; - bool failed; - - /** - * GLSL IR currently being processed, which is associated with our - * driver IR instructions for debugging purposes. - */ - const void *base_ir; - const char *current_annotation; - - int first_non_payload_grf; - unsigned int max_grf; - int *virtual_grf_start; - int *virtual_grf_end; dst_reg userplane[MAX_CLIP_PLANES]; bool live_intervals_valid; - dst_reg *variable_storage(ir_variable *var); - - void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); - bool need_all_constants_in_pull_buffer; /** @@ -164,48 +125,29 @@ public: */ /*@{*/ virtual void visit(ir_variable *); - virtual void visit(ir_loop *); - virtual void visit(ir_loop_jump *); - virtual void visit(ir_function_signature *); - virtual void visit(ir_function *); - virtual void visit(ir_expression *); - virtual void visit(ir_swizzle *); - virtual void visit(ir_dereference_variable *); - virtual void visit(ir_dereference_array *); - virtual void visit(ir_dereference_record *); - virtual void visit(ir_assignment *); - virtual void visit(ir_constant *); - virtual void visit(ir_call *); - virtual void visit(ir_return *); virtual void visit(ir_discard *); - virtual void visit(ir_texture *); - virtual void visit(ir_if *); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); /*@}*/ - src_reg result; + dst_reg + temporary_reg(const glsl_type *type) + { + const unsigned n = (type->is_array() || type->is_record() ? + 4 : type->vector_elements); + return resize(bld.natural_reg(brw_type_for_base_type(type), + type_size(type)), n); + } /* Regs for vertex results. Generated at ir_variable visiting time * for the ir->location's used. */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; - int *uniform_size; - int *uniform_vector_size; - int uniform_array_size; /*< Size of uniform_[vector_]size arrays */ - int uniforms; - - src_reg shader_start_time; - - struct hash_table *variable_ht; bool run(void); - void fail(const char *msg, ...); void setup_uniform_clipplane_values(); - void setup_uniform_values(ir_variable *ir); - void setup_builtin_uniform_values(ir_variable *ir); int setup_uniforms(int payload_reg); bool reg_allocate_trivial(); bool reg_allocate(); @@ -231,120 +173,21 @@ public: void opt_set_dependency_control(); void opt_schedule_instructions(); - vec4_instruction *emit(vec4_instruction *inst); - - vec4_instruction *emit(enum opcode opcode); - - vec4_instruction *emit(enum opcode opcode, dst_reg dst); - - vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); - - vec4_instruction *emit(enum opcode opcode, dst_reg dst, - src_reg src0, src_reg src1); - - vec4_instruction *emit(enum opcode opcode, dst_reg dst, - src_reg src0, src_reg src1, src_reg src2); - - vec4_instruction *emit_before(bblock_t *block, - vec4_instruction *inst, - vec4_instruction *new_inst); - - vec4_instruction *MOV(const dst_reg &dst, const src_reg &src0); - vec4_instruction *NOT(const dst_reg &dst, const src_reg &src0); - vec4_instruction *RNDD(const dst_reg &dst, const src_reg &src0); - vec4_instruction *RNDE(const dst_reg &dst, const src_reg &src0); - vec4_instruction *RNDZ(const dst_reg &dst, const src_reg &src0); - vec4_instruction *FRC(const dst_reg &dst, const src_reg &src0); - vec4_instruction *F32TO16(const dst_reg &dst, const src_reg &src0); - vec4_instruction *F16TO32(const dst_reg &dst, const src_reg &src0); - vec4_instruction *ADD(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *MUL(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *MACH(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *MAC(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *AND(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *OR(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *XOR(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *DP3(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *DP4(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *DPH(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *SHL(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *SHR(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *ASR(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, - enum brw_conditional_mod condition); - vec4_instruction *IF(src_reg src0, src_reg src1, - enum brw_conditional_mod condition); - vec4_instruction *IF(enum brw_predicate predicate); - vec4_instruction *PULL_CONSTANT_LOAD(const dst_reg &dst, - const src_reg &index); - vec4_instruction *SCRATCH_READ(const dst_reg &dst, const src_reg &index); - vec4_instruction *SCRATCH_WRITE(const dst_reg &dst, const src_reg &src, - const src_reg &index); - vec4_instruction *LRP(const dst_reg &dst, const src_reg &a, - const src_reg &y, const src_reg &x); - vec4_instruction *BFREV(const dst_reg &dst, const src_reg &value); - vec4_instruction *BFE(const dst_reg &dst, const src_reg &bits, - const src_reg &offset, const src_reg &value); - vec4_instruction *BFI1(const dst_reg &dst, const src_reg &bits, - const src_reg &offset); - vec4_instruction *BFI2(const dst_reg &dst, const src_reg &bfi1_dst, - const src_reg &insert, const src_reg &base); - vec4_instruction *FBH(const dst_reg &dst, const src_reg &value); - vec4_instruction *FBL(const dst_reg &dst, const src_reg &value); - vec4_instruction *CBIT(const dst_reg &dst, const src_reg &value); - vec4_instruction *MAD(const dst_reg &dst, const src_reg &c, - const src_reg &b, const src_reg &a); - vec4_instruction *ADDC(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - vec4_instruction *SUBB(const dst_reg &dst, const src_reg &src0, - const src_reg &src1); - - int implied_mrf_writes(vec4_instruction *inst); - - bool try_rewrite_rhs_to_dst(ir_assignment *ir, - dst_reg dst, - src_reg src, - vec4_instruction *pre_rhs_inst, - vec4_instruction *last_rhs_inst); - - /** Walks an exec_list of ir_instruction and sends it through this visitor. */ - void visit_instructions(const exec_list *list); + instruction *SCRATCH_READ(vec4_builder &bld, const dst_reg &dst, + const src_reg &index); + instruction *SCRATCH_WRITE(vec4_builder &bld, const dst_reg &dst, + const src_reg &src, const src_reg &index); + + void emit_pull_constant_load(vec4_builder &bld, + const dst_reg &dst, + const src_reg &surf_index, + uint32_t off, + const src_reg *reladdr, + unsigned num_components); void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst, src_reg src0, src_reg src1, src_reg one); - void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate); - void emit_if_gen6(ir_if *ir); - - void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, - src_reg src0, src_reg src1); - - void emit_lrp(const dst_reg &dst, - const src_reg &x, const src_reg &y, const src_reg &a); - - void emit_block_move(dst_reg *dst, src_reg *src, - const struct glsl_type *type, brw_predicate predicate); - - void emit_constant_values(dst_reg *dst, ir_constant *value); - - /** - * Emit the correct dot-product instruction for the type of arguments - */ - void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); - void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0); @@ -354,69 +197,53 @@ public: void emit_scs(ir_instruction *ir, enum prog_opcode op, dst_reg dst, const src_reg &src); - src_reg fix_3src_operand(src_reg src); - - void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); - void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); - void emit_math(enum opcode opcode, dst_reg dst, src_reg src); - void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); - void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); - void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); - src_reg fix_math_operand(src_reg src); - void emit_pack_half_2x16(dst_reg dst, src_reg src0); void emit_unpack_half_2x16(dst_reg dst, src_reg src0); - uint32_t gather_channel(ir_texture *ir, uint32_t sampler); - src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler); - void emit_gen6_gather_wa(uint8_t wa, dst_reg dst); - void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler); - void emit_ndc_computation(); void emit_psiz_and_flags(dst_reg reg); void emit_clip_distances(dst_reg reg, int offset); void emit_generic_urb_slot(dst_reg reg, int varying); void emit_urb_slot(dst_reg reg, int varying); - void emit_shader_time_begin(); - void emit_shader_time_end(); - void emit_shader_time_write(enum shader_time_shader_type type, - src_reg value); - - void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, - dst_reg dst, src_reg offset, src_reg src0, - src_reg src1); - - void emit_untyped_surface_read(unsigned surf_index, dst_reg dst, - src_reg offset); - src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst, src_reg *reladdr, int reg_offset); - src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst, - src_reg *reladdr, int reg_offset); void emit_scratch_read(bblock_t *block, vec4_instruction *inst, dst_reg dst, src_reg orig_src, int base_offset); void emit_scratch_write(bblock_t *block, vec4_instruction *inst, int base_offset); - void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, - dst_reg dst, - src_reg orig_src, - int base_offset); - bool try_emit_mad(ir_expression *ir); - bool try_emit_b2f_of_compare(ir_expression *ir); - void resolve_ud_negate(src_reg *reg); + vec4_instruction *emit_texture(ir_texture *ir, const dst_reg &dst, + const src_reg &coordinate, + const src_reg &shadow_c, + const src_reg &lod, const src_reg &lod2, + const src_reg &offset_val, + const src_reg &sample_index, + const src_reg &mcs, const src_reg &sampler); - src_reg get_timestamp(); + src_reg emit_untyped_surface_header(); bool process_move_condition(ir_rvalue *ir); void dump_instruction(backend_instruction *inst); void dump_instruction(backend_instruction *inst, FILE *file); - void visit_atomic_counter_intrinsic(ir_call *ir); + void try_replace_with_sel() {} + + bool + emit_interpolate_expression(ir_expression *ir) + { + unreachable("not reached"); + } + + const struct brw_sampler_prog_key_data * + sampler_prog_key() const { + return &key->tex; + } + + void no16(const char *msg, ...) {} protected: void emit_vertex(); @@ -432,7 +259,6 @@ protected: virtual void emit_thread_end() = 0; virtual void emit_urb_write_header(int mrf) = 0; virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; - virtual int compute_array_stride(ir_dereference_array *ir); const bool debug_flag; @@ -441,10 +267,6 @@ private: * If true, then register allocation should fail instead of spilling. */ const bool no_spills; - - const shader_time_shader_type st_base; - const shader_time_shader_type st_written; - const shader_time_shader_type st_reset; }; @@ -537,11 +359,13 @@ private: void generate_untyped_atomic(vec4_instruction *inst, struct brw_reg dst, + struct brw_reg payload, struct brw_reg atomic_op, struct brw_reg surf_index); void generate_untyped_surface_read(vec4_instruction *inst, struct brw_reg dst, + struct brw_reg payload, struct brw_reg surf_index); struct brw_context *brw; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index b0a5c0a65e9..3ccac54e436 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -163,18 +163,17 @@ vec4_visitor::opt_cse_local(bblock_t *block) entry->tmp = src_reg(this, glsl_type::float_type); entry->tmp.type = inst->dst.type; entry->tmp.swizzle = BRW_SWIZZLE_XYZW; - - vec4_instruction *copy = MOV(entry->generator->dst, entry->tmp); - entry->generator->insert_after(block, copy); + bld.at(block, (vec4_instruction *)entry->generator->next) + .MOV(entry->generator->dst, entry->tmp); entry->generator->dst = dst_reg(entry->tmp); } /* dest <- temp */ if (!inst->dst.is_null()) { assert(inst->dst.type == entry->tmp.type); - vec4_instruction *copy = MOV(inst->dst, entry->tmp); + vec4_instruction *copy = + bld.at(block, inst).MOV(inst->dst, entry->tmp); copy->force_writemask_all = inst->force_writemask_all; - inst->insert_before(block, copy); } /* Set our iterator so that next time through the loop inst->next diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index e5225673812..308a2114212 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1098,6 +1098,7 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, void vec4_generator::generate_untyped_atomic(vec4_instruction *inst, struct brw_reg dst, + struct brw_reg payload, struct brw_reg atomic_op, struct brw_reg surf_index) { @@ -1106,8 +1107,7 @@ vec4_generator::generate_untyped_atomic(vec4_instruction *inst, surf_index.file == BRW_IMMEDIATE_VALUE && surf_index.type == BRW_REGISTER_TYPE_UD); - brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf), - atomic_op.dw1.ud, surf_index.dw1.ud, + brw_untyped_atomic(p, dst, payload, atomic_op.dw1.ud, surf_index.dw1.ud, inst->mlen, 1); brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); @@ -1116,13 +1116,13 @@ vec4_generator::generate_untyped_atomic(vec4_instruction *inst, void vec4_generator::generate_untyped_surface_read(vec4_instruction *inst, struct brw_reg dst, + struct brw_reg payload, struct brw_reg surf_index) { assert(surf_index.file == BRW_IMMEDIATE_VALUE && surf_index.type == BRW_REGISTER_TYPE_UD); - brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf), - surf_index.dw1.ud, + brw_untyped_surface_read(p, dst, payload, surf_index.dw1.ud, inst->mlen, 1); brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); @@ -1479,11 +1479,11 @@ vec4_generator::generate_code(const cfg_t *cfg) break; case SHADER_OPCODE_UNTYPED_ATOMIC: - generate_untyped_atomic(inst, dst, src[0], src[1]); + generate_untyped_atomic(inst, dst, src[0], src[1], src[2]); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: - generate_untyped_surface_read(inst, dst, src[0]); + generate_untyped_surface_read(inst, dst, src[0], src[1]); break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index c569e0aa4ca..ce3ed7f65b3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -41,8 +41,7 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, bool no_spills) : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base, &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, - INTEL_DEBUG & DEBUG_GS, no_spills, - ST_GS, ST_GS_WRITTEN, ST_GS_RESET), + INTEL_DEBUG & DEBUG_GS, no_spills, ST_GS), c(c) { } @@ -55,8 +54,8 @@ vec4_gs_visitor::make_reg_for_system_value(ir_variable *ir) switch (ir->data.location) { case SYSTEM_VALUE_INVOCATION_ID: - this->current_annotation = "initialize gl_InvocationID"; - emit(GS_OPCODE_GET_INSTANCE_ID, *reg); + bld.set_annotation("initialize gl_InvocationID"); + bld.emit(GS_OPCODE_GET_INSTANCE_ID, *reg); break; default: unreachable("not reached"); @@ -148,17 +147,17 @@ vec4_gs_visitor::emit_prolog() * reads/writes to garbage memory). So just set it to zero at the top of * the shader. */ - this->current_annotation = "clear r0.2"; + bld.set_annotation("clear r0.2"); dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); - vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, 0u); + vec4_instruction *inst = bld.emit(GS_OPCODE_SET_DWORD_2, r0, 0u); inst->force_writemask_all = true; /* Create a virtual register to hold the vertex count */ this->vertex_count = src_reg(this, glsl_type::uint_type); /* Initialize the vertex_count register to 0 */ - this->current_annotation = "initialize vertex_count"; - inst = emit(MOV(dst_reg(this->vertex_count), 0u)); + bld.set_annotation("initialize vertex_count"); + inst = bld.MOV(dst_reg(this->vertex_count), 0u); inst->force_writemask_all = true; if (c->control_data_header_size_bits > 0) { @@ -172,8 +171,8 @@ vec4_gs_visitor::emit_prolog() * Otherwise, we need to initialize it to 0 here. */ if (c->control_data_header_size_bits <= 32) { - this->current_annotation = "initialize control data bits"; - inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); + bld.set_annotation("initialize control data bits"); + inst = bld.MOV(dst_reg(this->control_data_bits), 0u); inst->force_writemask_all = true; } } @@ -183,7 +182,7 @@ vec4_gs_visitor::emit_prolog() * component of VARYING_SLOT_PSIZ. */ if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) { - this->current_annotation = "swizzle gl_PointSize input"; + bld.set_annotation("swizzle gl_PointSize input"); for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) { dst_reg dst(ATTR, BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ); @@ -191,7 +190,7 @@ vec4_gs_visitor::emit_prolog() src_reg src(dst); dst.writemask = WRITEMASK_X; src.swizzle = BRW_SWIZZLE_WWWW; - inst = emit(MOV(dst, src)); + inst = bld.MOV(dst, src); /* In dual instanced dispatch mode, dst has a width of 4, so we need * to make sure the MOV happens regardless of which channels are @@ -201,7 +200,7 @@ vec4_gs_visitor::emit_prolog() } } - this->current_annotation = NULL; + bld.set_annotation(NULL); } @@ -222,7 +221,7 @@ vec4_gs_visitor::emit_thread_end() * corresponding to the most recently output vertex still need to be * emitted. */ - current_annotation = "thread end: emit control data bits"; + bld.set_annotation("thread end: emit control data bits"); emit_control_data_bits(); } @@ -231,15 +230,15 @@ vec4_gs_visitor::emit_thread_end() */ int base_mrf = 1; - current_annotation = "thread end"; + bld.set_annotation("thread end"); dst_reg mrf_reg(MRF, base_mrf); src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + vec4_instruction *inst = bld.MOV(mrf_reg, r0); inst->force_writemask_all = true; - emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); + bld.emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_end(); - inst = emit(GS_OPCODE_THREAD_END); + inst = bld.emit(GS_OPCODE_THREAD_END); inst->base_mrf = base_mrf; inst->mlen = 1; } @@ -258,10 +257,10 @@ vec4_gs_visitor::emit_urb_write_header(int mrf) */ dst_reg mrf_reg(MRF, mrf); src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - this->current_annotation = "URB write header"; - vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + bld.set_annotation("URB write header"); + vec4_instruction *inst = bld.MOV(mrf_reg, r0); inst->force_writemask_all = true; - emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count, + bld.emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count, (uint32_t) c->prog_data.output_vertex_size_hwords); } @@ -275,7 +274,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete) */ (void) complete; - vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE); + vec4_instruction *inst = bld.emit(GS_OPCODE_URB_WRITE); inst->offset = c->prog_data.control_data_header_size_hwords; /* We need to increment Global Offset by 1 to make room for Broadwell's @@ -288,9 +287,8 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete) return inst; } - -int -vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir) +unsigned +vec4_gs_visitor::get_array_stride(ir_dereference_array *ir) { /* Geometry shader inputs are arrays, but they use an unusual array layout: * instead of all array elements for a given geometry shader input being @@ -303,7 +301,7 @@ vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir) if (deref_var && deref_var->var->data.mode == ir_var_shader_in) return BRW_VARYING_SLOT_COUNT; else - return vec4_visitor::compute_array_stride(ir); + return backend_visitor::get_array_stride(ir); } @@ -349,8 +347,8 @@ vec4_gs_visitor::emit_control_data_bits() /* If vertex_count is 0, then no control data bits have been accumulated * yet, so we should do nothing. */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.CMP(bld.reg_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ); + bld.IF(BRW_PREDICATE_NORMAL); { /* If we are using either channel masks or a per-slot offset, then we * need to figure out which DWORD we are trying to write to, using the @@ -366,11 +364,11 @@ vec4_gs_visitor::emit_control_data_bits() src_reg dword_index(this, glsl_type::uint_type); if (urb_write_flags) { src_reg prev_count(this, glsl_type::uint_type); - emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); + bld.ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu); unsigned log2_bits_per_vertex = _mesa_fls(c->control_data_bits_per_vertex); - emit(SHR(dst_reg(dword_index), prev_count, - (uint32_t) (6 - log2_bits_per_vertex))); + bld.SHR(dst_reg(dword_index), prev_count, + (uint32_t) (6 - log2_bits_per_vertex)); } /* Start building the URB write message. The first MRF gets a copy of @@ -379,7 +377,7 @@ vec4_gs_visitor::emit_control_data_bits() int base_mrf = 1; dst_reg mrf_reg(MRF, base_mrf); src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + vec4_instruction *inst = bld.MOV(mrf_reg, r0); inst->force_writemask_all = true; if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) { @@ -387,8 +385,8 @@ vec4_gs_visitor::emit_control_data_bits() * the appropriate OWORD within the control data header. */ src_reg per_slot_offset(this, glsl_type::uint_type); - emit(SHR(dst_reg(per_slot_offset), dword_index, 2u)); - emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); + bld.SHR(dst_reg(per_slot_offset), dword_index, 2u); + bld.emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); } if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { @@ -400,24 +398,24 @@ vec4_gs_visitor::emit_control_data_bits() * together. */ src_reg channel(this, glsl_type::uint_type); - inst = emit(AND(dst_reg(channel), dword_index, 3u)); + inst = bld.AND(dst_reg(channel), dword_index, 3u); inst->force_writemask_all = true; src_reg one(this, glsl_type::uint_type); - inst = emit(MOV(dst_reg(one), 1u)); + inst = bld.MOV(dst_reg(one), 1u); inst->force_writemask_all = true; src_reg channel_mask(this, glsl_type::uint_type); - inst = emit(SHL(dst_reg(channel_mask), one, channel)); + inst = bld.SHL(dst_reg(channel_mask), one, channel); inst->force_writemask_all = true; - emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask), + bld.emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask), channel_mask); - emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask); + bld.emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask); } /* Store the control data bits in the message payload and send it. */ dst_reg mrf_reg2(MRF, base_mrf + 1); - inst = emit(MOV(mrf_reg2, this->control_data_bits)); + inst = bld.MOV(mrf_reg2, this->control_data_bits); inst->force_writemask_all = true; - inst = emit(GS_OPCODE_URB_WRITE); + inst = bld.emit(GS_OPCODE_URB_WRITE); inst->urb_write_flags = urb_write_flags; /* We need to increment Global Offset by 256-bits to make room for * Broadwell's extra "Vertex Count" payload at the beginning of the @@ -429,7 +427,7 @@ vec4_gs_visitor::emit_control_data_bits() inst->base_mrf = base_mrf; inst->mlen = 2; } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); } void @@ -455,11 +453,11 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id) /* reg::sid = stream_id */ src_reg sid(this, glsl_type::uint_type); - emit(MOV(dst_reg(sid), stream_id)); + bld.MOV(dst_reg(sid), stream_id); /* reg:shift_count = 2 * (vertex_count - 1) */ src_reg shift_count(this, glsl_type::uint_type); - emit(SHL(dst_reg(shift_count), this->vertex_count, 1u)); + bld.SHL(dst_reg(shift_count), this->vertex_count, 1u); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -467,23 +465,23 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id) * stream_id << ((2 * (vertex_count - 1)) % 32). */ src_reg mask(this, glsl_type::uint_type); - emit(SHL(dst_reg(mask), sid, shift_count)); - emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask)); + bld.SHL(dst_reg(mask), sid, shift_count); + bld.OR(dst_reg(this->control_data_bits), this->control_data_bits, mask); } void vec4_gs_visitor::visit(ir_emit_vertex *ir) { - this->current_annotation = "emit vertex: safety check"; + bld.set_annotation("emit vertex: safety check"); /* To ensure that we don't output more vertices than the shader specified * using max_vertices, do the logic inside a conditional of the form "if * (vertex_count < MAX)" */ unsigned num_output_vertices = c->gp->program.VerticesOut; - emit(CMP(dst_null_d(), this->vertex_count, - src_reg(num_output_vertices), BRW_CONDITIONAL_L)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.CMP(bld.reg_null_d(), this->vertex_count, + src_reg(num_output_vertices), BRW_CONDITIONAL_L); + bld.IF(BRW_PREDICATE_NORMAL); { /* If we're outputting 32 control data bits or less, then we can wait * until the shader is over to output them all. Otherwise we need to @@ -493,7 +491,7 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) * correct. */ if (c->control_data_header_size_bits > 32) { - this->current_annotation = "emit vertex: emit control data bits"; + bld.set_annotation("emit vertex: emit control data bits"); /* Only emit control data bits if we've finished accumulating a batch * of 32 bits. This is the case when: * @@ -513,10 +511,10 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) * vertex_count & (32 / bits_per_vertex - 1) == 0 */ vec4_instruction *inst = - emit(AND(dst_null_d(), this->vertex_count, - (uint32_t) (32 / c->control_data_bits_per_vertex - 1))); + bld.AND(bld.reg_null_d(), this->vertex_count, + (uint32_t) (32 / c->control_data_bits_per_vertex - 1)); inst->conditional_mod = BRW_CONDITIONAL_Z; - emit(IF(BRW_PREDICATE_NORMAL)); + bld.IF(BRW_PREDICATE_NORMAL); { emit_control_data_bits(); @@ -527,13 +525,13 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) * effect of any call to EndPrimitive() that the shader may have * made before outputting its first vertex. */ - inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); + inst = bld.MOV(dst_reg(this->control_data_bits), 0u); inst->force_writemask_all = true; } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); } - this->current_annotation = "emit vertex: vertex data"; + bld.set_annotation("emit vertex: vertex data"); emit_vertex(); /* In stream mode we have to set control data bits for all vertices @@ -543,17 +541,17 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) if (c->control_data_header_size_bits > 0 && c->prog_data.control_data_format == GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { - this->current_annotation = "emit vertex: Stream control data bits"; + bld.set_annotation("emit vertex: Stream control data bits"); set_stream_control_data_bits(ir->stream_id()); } - this->current_annotation = "emit vertex: increment vertex count"; - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, - src_reg(1u))); + bld.set_annotation("emit vertex: increment vertex count"); + bld.ADD(dst_reg(this->vertex_count), this->vertex_count, + src_reg(1u)); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); - this->current_annotation = NULL; + bld.set_annotation(NULL); } void @@ -594,17 +592,17 @@ vec4_gs_visitor::visit(ir_end_primitive *) /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */ src_reg one(this, glsl_type::uint_type); - emit(MOV(dst_reg(one), 1u)); + bld.MOV(dst_reg(one), 1u); src_reg prev_count(this, glsl_type::uint_type); - emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); + bld.ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu); src_reg mask(this, glsl_type::uint_type); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this * architecture, 1 << (vertex_count - 1) is equivalent to 1 << * ((vertex_count - 1) % 32). */ - emit(SHL(dst_reg(mask), one, prev_count)); - emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask)); + bld.SHL(dst_reg(mask), one, prev_count); + bld.OR(dst_reg(this->control_data_bits), this->control_data_bits, mask); } static const unsigned * diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index 8bf11facb0b..1c8e7ad876c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -91,7 +91,7 @@ protected: virtual void emit_thread_end(); virtual void emit_urb_write_header(int mrf); virtual vec4_instruction *emit_urb_write_opcode(bool complete); - virtual int compute_array_stride(ir_dereference_array *ir); + virtual unsigned get_array_stride(ir_dereference_array *ir); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index de04d881d8b..25a48fd5f4e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -23,7 +23,6 @@ #include "brw_vec4.h" #include "brw_cfg.h" -#include "glsl/ir_uniform.h" extern "C" { #include "program/sampler.h" } @@ -62,198 +61,11 @@ vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst, } vec4_instruction * -vec4_visitor::emit(vec4_instruction *inst) +vec4_visitor::SCRATCH_READ(vec4_builder &bld, const dst_reg &dst, + const src_reg &index) { - inst->ir = this->base_ir; - inst->annotation = this->current_annotation; - - this->instructions.push_tail(inst); - - return inst; -} - -vec4_instruction * -vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst, - vec4_instruction *new_inst) -{ - new_inst->ir = inst->ir; - new_inst->annotation = inst->annotation; - - inst->insert_before(block, new_inst); - - return inst; -} - -vec4_instruction * -vec4_visitor::emit(enum opcode opcode, dst_reg dst, - src_reg src0, src_reg src1, src_reg src2) -{ - return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2)); -} - - -vec4_instruction * -vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) -{ - return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1)); -} - -vec4_instruction * -vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) -{ - return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0)); -} - -vec4_instruction * -vec4_visitor::emit(enum opcode opcode, dst_reg dst) -{ - return emit(new(mem_ctx) vec4_instruction(opcode, dst)); -} - -vec4_instruction * -vec4_visitor::emit(enum opcode opcode) -{ - return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg())); -} - -#define ALU1(op) \ - vec4_instruction * \ - vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \ - { \ - return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \ - } - -#define ALU2(op) \ - vec4_instruction * \ - vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ - const src_reg &src1) \ - { \ - return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \ - src0, src1); \ - } - -#define ALU2_ACC(op) \ - vec4_instruction * \ - vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ - const src_reg &src1) \ - { \ - vec4_instruction *inst = new(mem_ctx) vec4_instruction( \ - BRW_OPCODE_##op, dst, src0, src1); \ - inst->writes_accumulator = true; \ - return inst; \ - } - -#define ALU3(op) \ - vec4_instruction * \ - vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ - const src_reg &src1, const src_reg &src2) \ - { \ - assert(brw->gen >= 6); \ - return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \ - src0, src1, src2); \ - } - -ALU1(NOT) -ALU1(MOV) -ALU1(FRC) -ALU1(RNDD) -ALU1(RNDE) -ALU1(RNDZ) -ALU1(F32TO16) -ALU1(F16TO32) -ALU2(ADD) -ALU2(MUL) -ALU2_ACC(MACH) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(DP3) -ALU2(DP4) -ALU2(DPH) -ALU2(SHL) -ALU2(SHR) -ALU2(ASR) -ALU3(LRP) -ALU1(BFREV) -ALU3(BFE) -ALU2(BFI1) -ALU3(BFI2) -ALU1(FBH) -ALU1(FBL) -ALU1(CBIT) -ALU3(MAD) -ALU2_ACC(ADDC) -ALU2_ACC(SUBB) -ALU2(MAC) - -/** Gen4 predicated IF. */ -vec4_instruction * -vec4_visitor::IF(enum brw_predicate predicate) -{ - vec4_instruction *inst; - - inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF); - inst->predicate = predicate; - - return inst; -} - -/** Gen6 IF with embedded comparison. */ -vec4_instruction * -vec4_visitor::IF(src_reg src0, src_reg src1, - enum brw_conditional_mod condition) -{ - assert(brw->gen == 6); - - vec4_instruction *inst; - - resolve_ud_negate(&src0); - resolve_ud_negate(&src1); - - inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(), - src0, src1); - inst->conditional_mod = condition; - - return inst; -} - -/** - * CMP: Sets the low bit of the destination channels with the result - * of the comparison, while the upper bits are undefined, and updates - * the flag register with the packed 16 bits of the result. - */ -vec4_instruction * -vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, - enum brw_conditional_mod condition) -{ - vec4_instruction *inst; - - /* original gen4 does type conversion to the destination type - * before before comparison, producing garbage results for floating - * point comparisons. - */ - if (brw->gen == 4) { - dst.type = src0.type; - if (dst.file == HW_REG) - dst.fixed_hw_reg.type = dst.type; - } - - resolve_ud_negate(&src0); - resolve_ud_negate(&src1); - - inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1); - inst->conditional_mod = condition; - - return inst; -} - -vec4_instruction * -vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index) -{ - vec4_instruction *inst; - - inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ, - dst, index); + instruction *inst = bld.emit(SHADER_OPCODE_GEN4_SCRATCH_READ, + dst, index); inst->base_mrf = 14; inst->mlen = 2; @@ -261,13 +73,11 @@ vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index) } vec4_instruction * -vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src, - const src_reg &index) +vec4_visitor::SCRATCH_WRITE(vec4_builder &bld, const dst_reg &dst, + const src_reg &src, const src_reg &index) { - vec4_instruction *inst; - - inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE, - dst, src, index); + instruction *inst = bld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, + dst, src, index); inst->base_mrf = 13; inst->mlen = 3; @@ -275,167 +85,48 @@ vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src, } void -vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) +vec4_visitor::emit_pull_constant_load(vec4_builder &bld, + const dst_reg &dst, + const src_reg &surf_index, + uint32_t off, + const src_reg *reladdr, + unsigned num_components) { - static enum opcode dot_opcodes[] = { - BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 - }; - - emit(dot_opcodes[elements - 2], dst, src0, src1); -} - -src_reg -vec4_visitor::fix_3src_operand(src_reg src) -{ - /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be - * able to use vertical stride of zero to replicate the vec4 uniform, like - * - * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7] - * - * But you can't, since vertical stride is always four in three-source - * instructions. Instead, insert a MOV instruction to do the replication so - * that the three-source instruction can consume it. - */ - - /* The MOV is only needed if the source is a uniform or immediate. */ - if (src.file != UNIFORM && src.file != IMM) - return src; - - if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle)) - return src; - - dst_reg expanded = dst_reg(this, glsl_type::vec4_type); - expanded.type = src.type; - emit(MOV(expanded, src)); - return src_reg(expanded); -} - -src_reg -vec4_visitor::fix_math_operand(src_reg src) -{ - /* The gen6 math instruction ignores the source modifiers -- - * swizzle, abs, negate, and at least some parts of the register - * region description. - * - * Rather than trying to enumerate all these cases, *always* expand the - * operand to a temp GRF for gen6. - * - * For gen7, keep the operand as-is, except if immediate, which gen7 still - * can't use. + /* Pre-gen6, the message header uses byte offsets instead of vec4 + * (16-byte) offset units. */ + const unsigned scale = (brw->gen >= 6 ? 16 : 1); + src_reg result(bld.vector(num_components).natural_reg(dst.type)); + src_reg addr; - if (brw->gen == 7 && src.file != IMM) - return src; - - dst_reg expanded = dst_reg(this, glsl_type::vec4_type); - expanded.type = src.type; - emit(MOV(expanded, src)); - return src_reg(expanded); -} - -void -vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) -{ - src = fix_math_operand(src); - - if (brw->gen == 6 && dst.writemask != WRITEMASK_XYZW) { - /* The gen6 math instruction must be align1, so we can't do - * writemasks. - */ - dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); - - emit(opcode, temp_dst, src); - - emit(MOV(dst, src_reg(temp_dst))); + if (reladdr) { + addr = src_reg(bld.scalar_reg(reladdr->type)); + bld.ADD(dst_reg(addr), *reladdr, src_reg(off / 16)); + if (scale == 1) + bld.SHL(dst_reg(addr), addr, src_reg(4)); } else { - emit(opcode, dst, src); + addr = src_reg((off & ~0xf) / scale); } -} -void -vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) -{ - vec4_instruction *inst = emit(opcode, dst, src); - inst->base_mrf = 1; - inst->mlen = 1; -} - -void -vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) -{ - switch (opcode) { - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - break; - default: - unreachable("not reached: bad math opcode"); - } - - if (brw->gen >= 8) { - emit(opcode, dst, src); - } else if (brw->gen >= 6) { - emit_math1_gen6(opcode, dst, src); - } else { - emit_math1_gen4(opcode, dst, src); - } -} - -void -vec4_visitor::emit_math2_gen6(enum opcode opcode, - dst_reg dst, src_reg src0, src_reg src1) -{ - src0 = fix_math_operand(src0); - src1 = fix_math_operand(src1); - - if (brw->gen == 6 && dst.writemask != WRITEMASK_XYZW) { - /* The gen6 math instruction must be align1, so we can't do - * writemasks. - */ - dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); - temp_dst.type = dst.type; - - emit(opcode, temp_dst, src0, src1); + if (brw->gen >= 7) { + if (addr.file == IMM) { + dst_reg tmp = bld.scalar_reg(addr.type); + bld.MOV(tmp, addr); + addr = src_reg(tmp); + } - emit(MOV(dst, src_reg(temp_dst))); + bld.emit(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, dst_reg(result), + surf_index, addr); } else { - emit(opcode, dst, src0, src1); - } -} - -void -vec4_visitor::emit_math2_gen4(enum opcode opcode, - dst_reg dst, src_reg src0, src_reg src1) -{ - vec4_instruction *inst = emit(opcode, dst, src0, src1); - inst->base_mrf = 1; - inst->mlen = 2; -} - -void -vec4_visitor::emit_math(enum opcode opcode, - dst_reg dst, src_reg src0, src_reg src1) -{ - switch (opcode) { - case SHADER_OPCODE_POW: - case SHADER_OPCODE_INT_QUOTIENT: - case SHADER_OPCODE_INT_REMAINDER: - break; - default: - unreachable("not reached: unsupported binary math opcode"); + vec4_instruction *pull = bld.emit(VS_OPCODE_PULL_CONSTANT_LOAD, + dst_reg(result), surf_index, addr); + pull->base_mrf = 14; + pull->mlen = 1; } - if (brw->gen >= 8) { - emit(opcode, dst, src0, src1); - } else if (brw->gen >= 6) { - emit_math2_gen6(opcode, dst, src0, src1); - } else { - emit_math2_gen4(opcode, dst, src0, src1); - } + result.swizzle += BRW_SWIZZLE4(off % 16 / 4, off % 16 / 4, + off % 16 / 4, off % 16 / 4); + bld.MOV(dst, result); } void @@ -486,7 +177,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) * You should inspect the disasm output in order to verify that the MOV is * not optimized away. */ - emit(MOV(tmp_dst, src_reg(0x12345678u))); + bld.MOV(tmp_dst, src_reg(0x12345678u)); #endif /* Give tmp the form below, where "." means untouched. @@ -499,20 +190,20 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) * relies on the undocumented hardware behavior mentioned above. */ tmp_dst.writemask = WRITEMASK_XY; - emit(F32TO16(tmp_dst, src0)); + bld.F32TO16(tmp_dst, src0); /* Give the write-channels of dst the form: * 0xhhhh0000 */ tmp_src.swizzle = BRW_SWIZZLE_YYYY; - emit(SHL(dst, tmp_src, src_reg(16u))); + bld.SHL(dst, tmp_src, src_reg(16u)); /* Finally, give the write-channels of dst the form of packHalf2x16's * output: * 0xhhhhllll */ tmp_src.swizzle = BRW_SWIZZLE_XXXX; - emit(OR(dst, src_reg(dst), tmp_src)); + bld.OR(dst, src_reg(dst), tmp_src); } void @@ -544,70 +235,13 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0) src_reg tmp_src(tmp_dst); tmp_dst.writemask = WRITEMASK_X; - emit(AND(tmp_dst, src0, src_reg(0xffffu))); + bld.AND(tmp_dst, src0, src_reg(0xffffu)); tmp_dst.writemask = WRITEMASK_Y; - emit(SHR(tmp_dst, src0, src_reg(16u))); + bld.SHR(tmp_dst, src0, src_reg(16u)); dst.writemask = WRITEMASK_XY; - emit(F16TO32(dst, tmp_src)); -} - -void -vec4_visitor::visit_instructions(const exec_list *list) -{ - foreach_in_list(ir_instruction, ir, list) { - base_ir = ir; - ir->accept(this); - } -} - - -static int -type_size(const struct glsl_type *type) -{ - unsigned int i; - int size; - - switch (type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - if (type->is_matrix()) { - return type->matrix_columns; - } else { - /* Regardless of size of vector, it gets a vec4. This is bad - * packing for things like floats, but otherwise arrays become a - * mess. Hopefully a later pass over the code can pack scalars - * down if appropriate. - */ - return 1; - } - case GLSL_TYPE_ARRAY: - assert(type->length > 0); - return type_size(type->fields.array) * type->length; - case GLSL_TYPE_STRUCT: - size = 0; - for (i = 0; i < type->length; i++) { - size += type_size(type->fields.structure[i].type); - } - return size; - case GLSL_TYPE_SAMPLER: - /* Samplers take up no register space, since they're baked in at - * link time. - */ - return 0; - case GLSL_TYPE_ATOMIC_UINT: - return 0; - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - unreachable("not reached"); - } - - return 0; + bld.F16TO32(dst, tmp_src); } src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) @@ -615,7 +249,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type)); + this->reg = v->alloc.allocate(v->type_size(type)); if (type->is_array() || type->is_record()) { this->swizzle = BRW_SWIZZLE_NOOP; @@ -633,7 +267,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type) * size); + this->reg = v->alloc.allocate(v->type_size(type) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -645,7 +279,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type)); + this->reg = v->alloc.allocate(v->type_size(type)); if (type->is_array() || type->is_record()) { this->writemask = WRITEMASK_XYZW; @@ -656,55 +290,6 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) this->type = brw_type_for_base_type(type); } -/* Our support for uniforms is piggy-backed on the struct - * gl_fragment_program, because that's where the values actually - * get stored, rather than in some global gl_shader_program uniform - * store. - */ -void -vec4_visitor::setup_uniform_values(ir_variable *ir) -{ - int namelen = strlen(ir->name); - - /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_driver_storage structs for each subcomponent that - * glGetUniformLocation() could name. We know it's been set up in the same - * order we'd walk the type, so walk the list of storage and find anything - * with our name, or the prefix of a component that starts with our name. - */ - for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { - struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; - - if (strncmp(ir->name, storage->name, namelen) != 0 || - (storage->name[namelen] != 0 && - storage->name[namelen] != '.' && - storage->name[namelen] != '[')) { - continue; - } - - gl_constant_value *components = storage->storage; - unsigned vector_count = (MAX2(storage->array_elements, 1) * - storage->type->matrix_columns); - - for (unsigned s = 0; s < vector_count; s++) { - assert(uniforms < uniform_array_size); - uniform_vector_size[uniforms] = storage->type->vector_elements; - - int i; - for (i = 0; i < uniform_vector_size[uniforms]; i++) { - stage_prog_data->param[uniforms * 4 + i] = components; - components++; - } - for (; i < 4; i++) { - static gl_constant_value zero = { 0.0 }; - stage_prog_data->param[uniforms * 4 + i] = &zero; - } - - uniforms++; - } - } -} - void vec4_visitor::setup_uniform_clipplane_values() { @@ -723,270 +308,6 @@ vec4_visitor::setup_uniform_clipplane_values() } } -/* Our support for builtin uniforms is even scarier than non-builtin. - * It sits on top of the PROG_STATE_VAR parameters that are - * automatically updated from GL context state. - */ -void -vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) -{ - const ir_state_slot *const slots = ir->get_state_slots(); - assert(slots != NULL); - - for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { - /* This state reference has already been setup by ir_to_mesa, - * but we'll get the same index back here. We can reference - * ParameterValues directly, since unlike brw_fs.cpp, we never - * add new state references during compile. - */ - int index = _mesa_add_state_reference(this->prog->Parameters, - (gl_state_index *)slots[i].tokens); - gl_constant_value *values = - &this->prog->Parameters->ParameterValues[index][0]; - - assert(this->uniforms < uniform_array_size); - this->uniform_vector_size[this->uniforms] = 0; - /* Add each of the unique swizzled channels of the element. - * This will end up matching the size of the glsl_type of this field. - */ - int last_swiz = -1; - for (unsigned int j = 0; j < 4; j++) { - int swiz = GET_SWZ(slots[i].swizzle, j); - last_swiz = swiz; - - stage_prog_data->param[this->uniforms * 4 + j] = &values[swiz]; - assert(this->uniforms < uniform_array_size); - if (swiz <= last_swiz) - this->uniform_vector_size[this->uniforms]++; - } - this->uniforms++; - } -} - -dst_reg * -vec4_visitor::variable_storage(ir_variable *var) -{ - return (dst_reg *)hash_table_find(this->variable_ht, var); -} - -void -vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, - enum brw_predicate *predicate) -{ - ir_expression *expr = ir->as_expression(); - - *predicate = BRW_PREDICATE_NORMAL; - - if (expr && expr->operation != ir_binop_ubo_load) { - src_reg op[3]; - vec4_instruction *inst; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - expr->operands[i]->accept(this); - op[i] = this->result; - - resolve_ud_negate(&op[i]); - } - - switch (expr->operation) { - case ir_unop_logic_not: - inst = emit(AND(dst_null_d(), op[0], src_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - - case ir_binop_logic_xor: - inst = emit(XOR(dst_null_d(), op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_or: - inst = emit(OR(dst_null_d(), op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_and: - inst = emit(AND(dst_null_d(), op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_unop_f2b: - if (brw->gen >= 6) { - emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(dst_null_f(), op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_unop_i2b: - if (brw->gen >= 6) { - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(dst_null_d(), op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_binop_all_equal: - inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); - *predicate = BRW_PREDICATE_ALIGN16_ALL4H; - break; - - case ir_binop_any_nequal: - inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); - *predicate = BRW_PREDICATE_ALIGN16_ANY4H; - break; - - case ir_unop_any: - inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - *predicate = BRW_PREDICATE_ALIGN16_ANY4H; - break; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_nequal: - emit(CMP(dst_null_d(), op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - break; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - inst = emit(MOV(dst_null_d(), op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to return. */ - dst_reg temp(this, expr->operands[1]->type); - inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]); - inst->predicate = BRW_PREDICATE_NORMAL; - - /* Expand the result to a condition code. */ - inst = emit(MOV(dst_null_d(), src_reg(temp))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - } - - default: - unreachable("not reached"); - } - return; - } - - ir->accept(this); - - resolve_ud_negate(&this->result); - - if (brw->gen >= 6) { - vec4_instruction *inst = emit(AND(dst_null_d(), - this->result, src_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } else { - vec4_instruction *inst = emit(MOV(dst_null_d(), this->result)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } -} - -/** - * Emit a gen6 IF statement with the comparison folded into the IF - * instruction. - */ -void -vec4_visitor::emit_if_gen6(ir_if *ir) -{ - ir_expression *expr = ir->condition->as_expression(); - - if (expr && expr->operation != ir_binop_ubo_load) { - src_reg op[3]; - dst_reg temp; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - expr->operands[i]->accept(this); - op[i] = this->result; - } - - switch (expr->operation) { - case ir_unop_logic_not: - emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z)); - return; - - case ir_binop_logic_xor: - emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_or: - temp = dst_reg(this, glsl_type::bool_type); - emit(OR(temp, op[0], op[1])); - emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_and: - temp = dst_reg(this, glsl_type::bool_type); - emit(AND(temp, op[0], op[1])); - emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_unop_f2b: - emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_unop_i2b: - emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_nequal: - emit(IF(op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - return; - - case ir_binop_all_equal: - emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); - emit(IF(BRW_PREDICATE_ALIGN16_ALL4H)); - return; - - case ir_binop_any_nequal: - emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); - emit(IF(BRW_PREDICATE_ALIGN16_ANY4H)); - return; - - case ir_unop_any: - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - emit(IF(BRW_PREDICATE_ALIGN16_ANY4H)); - return; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - vec4_instruction *inst = emit(MOV(dst_null_d(), op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to return. */ - dst_reg temp(this, expr->operands[1]->type); - inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]); - inst->predicate = BRW_PREDICATE_NORMAL; - - emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); - return; - } - - default: - unreachable("not reached"); - } - return; - } - - ir->condition->accept(this); - - emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ)); -} - void vec4_visitor::visit(ir_variable *ir) { @@ -995,1803 +316,31 @@ vec4_visitor::visit(ir_variable *ir) if (variable_storage(ir)) return; - switch (ir->data.mode) { - case ir_var_shader_in: - reg = new(mem_ctx) dst_reg(ATTR, ir->data.location); - break; + if (ir->data.mode == ir_var_shader_in) { + reg = new(mem_ctx) dst_reg(resize(dst_reg(ATTR, ir->data.location), + type_vector_size(ir->type))); - case ir_var_shader_out: - reg = new(mem_ctx) dst_reg(this, ir->type); + } else if (ir->data.mode == ir_var_shader_out) { + reg = new(mem_ctx) dst_reg(temporary_reg(ir->type)); for (int i = 0; i < type_size(ir->type); i++) { - output_reg[ir->data.location + i] = *reg; - output_reg[ir->data.location + i].reg_offset = i; - output_reg[ir->data.location + i].type = + output_reg[ir->data.location + i] = *reg; + output_reg[ir->data.location + i].reg_offset = i; + output_reg[ir->data.location + i].type = brw_type_for_base_type(ir->type->get_scalar_type()); - output_reg_annotation[ir->data.location + i] = ir->name; - } - break; - - case ir_var_auto: - case ir_var_temporary: - reg = new(mem_ctx) dst_reg(this, ir->type); - break; - - case ir_var_uniform: - reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); - - /* Thanks to the lower_ubo_reference pass, we will see only - * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO - * variables, so no need for them to be in variable_ht. - * - * Some uniforms, such as samplers and atomic counters, have no actual - * storage, so we should ignore them. - */ - if (ir->is_in_uniform_block() || type_size(ir->type) == 0) - return; - - /* Track how big the whole uniform variable is, in case we need to put a - * copy of its data into pull constants for array access. - */ - assert(this->uniforms < uniform_array_size); - this->uniform_size[this->uniforms] = type_size(ir->type); - - if (!strncmp(ir->name, "gl_", 3)) { - setup_builtin_uniform_values(ir); - } else { - setup_uniform_values(ir); + output_reg_annotation[ir->data.location + i] = ir->name; } - break; - case ir_var_system_value: + } else if (ir->data.mode == ir_var_system_value) { reg = make_reg_for_system_value(ir); - break; - - default: - unreachable("not reached"); - } - - reg->type = brw_type_for_base_type(ir->type); - hash_table_insert(this->variable_ht, reg, ir); -} - -void -vec4_visitor::visit(ir_loop *ir) -{ - /* We don't want debugging output to print the whole body of the - * loop as the annotation. - */ - this->base_ir = NULL; - - emit(BRW_OPCODE_DO); - - visit_instructions(&ir->body_instructions); - - emit(BRW_OPCODE_WHILE); -} - -void -vec4_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit(BRW_OPCODE_BREAK); - break; - case ir_loop_jump::jump_continue: - emit(BRW_OPCODE_CONTINUE); - break; - } -} - - -void -vec4_visitor::visit(ir_function_signature *) -{ - unreachable("not reached"); -} - -void -vec4_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(NULL, &empty, false); - - assert(sig); - - visit_instructions(&sig->body); - } -} - -bool -vec4_visitor::try_emit_mad(ir_expression *ir) -{ - /* 3-src instructions were introduced in gen6. */ - if (brw->gen < 6) - return false; - - /* MAD can only handle floating-point data. */ - if (ir->type->base_type != GLSL_TYPE_FLOAT) - return false; - - ir_rvalue *nonmul = ir->operands[1]; - ir_expression *mul = ir->operands[0]->as_expression(); - - if (!mul || mul->operation != ir_binop_mul) { - nonmul = ir->operands[0]; - mul = ir->operands[1]->as_expression(); - - if (!mul || mul->operation != ir_binop_mul) - return false; - } - - nonmul->accept(this); - src_reg src0 = fix_3src_operand(this->result); - - mul->operands[0]->accept(this); - src_reg src1 = fix_3src_operand(this->result); - - mul->operands[1]->accept(this); - src_reg src2 = fix_3src_operand(this->result); - - this->result = src_reg(this, ir->type); - emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2); - - return true; -} - -bool -vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir) -{ - /* This optimization relies on CMP setting the destination to 0 when - * false. Early hardware only sets the least significant bit, and - * leaves the other bits undefined. So we can't use it. - */ - if (brw->gen < 6) - return false; - - ir_expression *const cmp = ir->operands[0]->as_expression(); - - if (cmp == NULL) - return false; - - switch (cmp->operation) { - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_nequal: - break; - - default: - return false; - } - - cmp->operands[0]->accept(this); - const src_reg cmp_src0 = this->result; - - cmp->operands[1]->accept(this); - const src_reg cmp_src1 = this->result; - - this->result = src_reg(this, ir->type); - - emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1, - brw_conditional_for_comparison(cmp->operation))); - - /* If the comparison is false, this->result will just happen to be zero. - */ - vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result), - this->result, src_reg(1.0f)); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; - - return true; -} - -void -vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, - src_reg src0, src_reg src1) -{ - vec4_instruction *inst; - - if (brw->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(dst, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } -} - -void -vec4_visitor::emit_lrp(const dst_reg &dst, - const src_reg &x, const src_reg &y, const src_reg &a) -{ - if (brw->gen >= 6) { - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(LRP(dst, - fix_3src_operand(a), fix_3src_operand(y), fix_3src_operand(x))); - } else { - /* Earlier generations don't support three source operations, so we - * need to emit x*(1-a) + y*a. - */ - dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type); - dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); - dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type); - y_times_a.writemask = dst.writemask; - one_minus_a.writemask = dst.writemask; - x_times_one_minus_a.writemask = dst.writemask; - - emit(MUL(y_times_a, y, a)); - emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); - emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); - emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); - } -} - -void -vec4_visitor::visit(ir_expression *ir) -{ - unsigned int operand; - src_reg op[Elements(ir->operands)]; - src_reg result_src; - dst_reg result_dst; - vec4_instruction *inst; - - if (ir->operation == ir_binop_add) { - if (try_emit_mad(ir)) - return; - } - - if (ir->operation == ir_unop_b2f) { - if (try_emit_b2f_of_compare(ir)) - return; - } - - for (operand = 0; operand < ir->get_num_operands(); operand++) { - this->result.file = BAD_FILE; - ir->operands[operand]->accept(this); - if (this->result.file == BAD_FILE) { - fprintf(stderr, "Failed to get tree for expression operand:\n"); - ir->operands[operand]->fprint(stderr); - exit(1); - } - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - } - - int vector_elements = ir->operands[0]->type->vector_elements; - if (ir->operands[1]) { - vector_elements = MAX2(vector_elements, - ir->operands[1]->type->vector_elements); - } - - this->result.file = BAD_FILE; - - /* Storage for our result. Ideally for an assignment we'd be using - * the actual storage for the result here, instead. - */ - result_src = src_reg(this, ir->type); - /* convenience for the emit functions below. */ - result_dst = dst_reg(result_src); - /* If nothing special happens, this is the result. */ - this->result = result_src; - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - switch (ir->operation) { - case ir_unop_logic_not: - if (ctx->Const.UniformBooleanTrue != 1) { - emit(NOT(result_dst, op[0])); - } else { - emit(XOR(result_dst, op[0], src_reg(1))); - } - break; - case ir_unop_neg: - op[0].negate = !op[0].negate; - emit(MOV(result_dst, op[0])); - break; - case ir_unop_abs: - op[0].abs = true; - op[0].negate = false; - emit(MOV(result_dst, op[0])); - break; - - case ir_unop_sign: - if (ir->type->is_float()) { - /* AND(val, 0x80000000) gives the sign bit. - * - * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not - * zero. - */ - emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - - op[0].type = BRW_REGISTER_TYPE_UD; - result_dst.type = BRW_REGISTER_TYPE_UD; - emit(AND(result_dst, op[0], src_reg(0x80000000u))); - - inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u))); - inst->predicate = BRW_PREDICATE_NORMAL; - - this->result.type = BRW_REGISTER_TYPE_F; - } else { - /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). - * -> non-negative val generates 0x00000000. - * Predicated OR sets 1 if val is positive. - */ - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G)); - - emit(ASR(result_dst, op[0], src_reg(31))); - - inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - break; - - case ir_unop_rcp: - emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); - break; - - case ir_unop_exp2: - emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); - break; - case ir_unop_log2: - emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); - break; - case ir_unop_exp: - case ir_unop_log: - unreachable("not reached: should be handled by ir_explog_to_explog2"); - case ir_unop_sin: - case ir_unop_sin_reduced: - emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); - break; - case ir_unop_cos: - case ir_unop_cos_reduced: - emit_math(SHADER_OPCODE_COS, result_dst, op[0]); - break; - - case ir_unop_dFdx: - case ir_unop_dFdx_coarse: - case ir_unop_dFdx_fine: - case ir_unop_dFdy: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - unreachable("derivatives not valid in vertex shader"); - - case ir_unop_bitfield_reverse: - emit(BFREV(result_dst, op[0])); - break; - case ir_unop_bit_count: - emit(CBIT(result_dst, op[0])); - break; - case ir_unop_find_msb: { - src_reg temp = src_reg(this, glsl_type::uint_type); - - inst = emit(FBH(dst_reg(temp), op[0])); - inst->dst.writemask = WRITEMASK_XYZW; - - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ - - /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ - temp.swizzle = BRW_SWIZZLE_NOOP; - emit(MOV(result_dst, temp)); - - src_reg src_tmp = src_reg(result_dst); - emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ)); - - src_tmp.negate = true; - inst = emit(ADD(result_dst, src_tmp, src_reg(31))); - inst->predicate = BRW_PREDICATE_NORMAL; - break; - } - case ir_unop_find_lsb: - emit(FBL(result_dst, op[0])); - break; - case ir_unop_saturate: - inst = emit(MOV(result_dst, op[0])); - inst->saturate = true; - break; - - case ir_unop_noise: - unreachable("not reached: should be handled by lower_noise"); - - case ir_binop_add: - emit(ADD(result_dst, op[0], op[1])); - break; - case ir_binop_sub: - unreachable("not reached: should be handled by ir_sub_to_add_neg"); - - case ir_binop_mul: - if (brw->gen < 8 && ir->type->is_integer()) { - /* For integer multiplication, the MUL uses the low 16 bits of one of - * the operands (src0 through SNB, src1 on IVB and later). The MACH - * accumulates in the contribution of the upper 16 bits of that - * operand. If we can determine that one of the args is in the low - * 16 bits, though, we can just emit a single MUL. - */ - if (ir->operands[0]->is_uint16_constant()) { - if (brw->gen < 7) - emit(MUL(result_dst, op[0], op[1])); - else - emit(MUL(result_dst, op[1], op[0])); - } else if (ir->operands[1]->is_uint16_constant()) { - if (brw->gen < 7) - emit(MUL(result_dst, op[1], op[0])); - else - emit(MUL(result_dst, op[0], op[1])); - } else { - struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(dst_null_d(), op[0], op[1])); - emit(MOV(result_dst, src_reg(acc))); - } - } else { - emit(MUL(result_dst, op[0], op[1])); - } - break; - case ir_binop_imul_high: { - struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(result_dst, op[0], op[1])); - break; - } - case ir_binop_div: - /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]); - break; - case ir_binop_carry: { - struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD); - - emit(ADDC(dst_null_ud(), op[0], op[1])); - emit(MOV(result_dst, src_reg(acc))); - break; - } - case ir_binop_borrow: { - struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD); - - emit(SUBB(dst_null_ud(), op[0], op[1])); - emit(MOV(result_dst, src_reg(acc))); - break; - } - case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]); - break; - - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_nequal: { - emit(CMP(result_dst, op[0], op[1], - brw_conditional_for_comparison(ir->operation))); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result_src, src_reg(1))); - } - break; - } - - case ir_binop_all_equal: - /* "==" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); - emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); - inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; - } else { - emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z)); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result_src, src_reg(1))); - } - } - break; - case ir_binop_any_nequal: - /* "!=" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); - - emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); - inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; - } else { - emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ)); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result_src, src_reg(1))); - } - } - break; - - case ir_unop_any: - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - emit(MOV(result_dst, src_reg(0))); - - inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); - inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; - break; - - case ir_binop_logic_xor: - emit(XOR(result_dst, op[0], op[1])); - break; - - case ir_binop_logic_or: - emit(OR(result_dst, op[0], op[1])); - break; - - case ir_binop_logic_and: - emit(AND(result_dst, op[0], op[1])); - break; - - case ir_binop_dot: - assert(ir->operands[0]->type->is_vector()); - assert(ir->operands[0]->type == ir->operands[1]->type); - emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); - break; - - case ir_unop_sqrt: - emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); - break; - case ir_unop_rsq: - emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); - break; - - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - this->result = op[0]; - this->result.type = BRW_REGISTER_TYPE_F; - break; - - case ir_unop_bitcast_f2i: - this->result = op[0]; - this->result.type = BRW_REGISTER_TYPE_D; - break; - - case ir_unop_bitcast_f2u: - this->result = op[0]; - this->result.type = BRW_REGISTER_TYPE_UD; - break; - - case ir_unop_i2f: - case ir_unop_i2u: - case ir_unop_u2i: - case ir_unop_u2f: - case ir_unop_f2i: - case ir_unop_f2u: - emit(MOV(result_dst, op[0])); - break; - case ir_unop_b2i: - if (ctx->Const.UniformBooleanTrue != 1) { - emit(AND(result_dst, op[0], src_reg(1))); - } else { - emit(MOV(result_dst, op[0])); - } - break; - case ir_unop_b2f: - if (ctx->Const.UniformBooleanTrue != 1) { - op[0].type = BRW_REGISTER_TYPE_UD; - result_dst.type = BRW_REGISTER_TYPE_UD; - emit(AND(result_dst, op[0], src_reg(0x3f800000u))); - result_dst.type = BRW_REGISTER_TYPE_F; - } else { - emit(MOV(result_dst, op[0])); - } - break; - case ir_unop_f2b: - case ir_unop_i2b: - emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result_src, src_reg(1))); - } - break; - - case ir_unop_trunc: - emit(RNDZ(result_dst, op[0])); - break; - case ir_unop_ceil: - op[0].negate = !op[0].negate; - inst = emit(RNDD(result_dst, op[0])); - this->result.negate = true; - break; - case ir_unop_floor: - inst = emit(RNDD(result_dst, op[0])); - break; - case ir_unop_fract: - inst = emit(FRC(result_dst, op[0])); - break; - case ir_unop_round_even: - emit(RNDE(result_dst, op[0])); - break; - - case ir_binop_min: - emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]); - break; - case ir_binop_max: - emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]); - break; - - case ir_binop_pow: - emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); - break; - - case ir_unop_bit_not: - inst = emit(NOT(result_dst, op[0])); - break; - case ir_binop_bit_and: - inst = emit(AND(result_dst, op[0], op[1])); - break; - case ir_binop_bit_xor: - inst = emit(XOR(result_dst, op[0], op[1])); - break; - case ir_binop_bit_or: - inst = emit(OR(result_dst, op[0], op[1])); - break; - - case ir_binop_lshift: - inst = emit(SHL(result_dst, op[0], op[1])); - break; - - case ir_binop_rshift: - if (ir->type->base_type == GLSL_TYPE_INT) - inst = emit(ASR(result_dst, op[0], op[1])); - else - inst = emit(SHR(result_dst, op[0], op[1])); - break; - - case ir_binop_bfm: - emit(BFI1(result_dst, op[0], op[1])); - break; - - case ir_binop_ubo_load: { - ir_constant *const_uniform_block = ir->operands[0]->as_constant(); - ir_constant *const_offset_ir = ir->operands[1]->as_constant(); - unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; - src_reg offset; - - /* Now, load the vector from that offset. */ - assert(ir->type->is_vector() || ir->type->is_scalar()); - - src_reg packed_consts = src_reg(this, glsl_type::vec4_type); - packed_consts.type = result.type; - src_reg surf_index; - - if (const_uniform_block) { - /* The block index is a constant, so just emit the binding table entry - * as an immediate. - */ - surf_index = src_reg(prog_data->base.binding_table.ubo_start + - const_uniform_block->value.u[0]); - } else { - /* The block index is not a constant. Evaluate the index expression - * per-channel and add the base UBO index; the generator will select - * a value from any live channel. - */ - surf_index = src_reg(this, glsl_type::uint_type); - emit(ADD(dst_reg(surf_index), op[0], - src_reg(prog_data->base.binding_table.ubo_start))); - - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + - shader_prog->NumUniformBlocks - 1); - } - - if (const_offset_ir) { - if (brw->gen >= 8) { - /* Store the offset in a GRF so we can send-from-GRF. */ - offset = src_reg(this, glsl_type::int_type); - emit(MOV(dst_reg(offset), src_reg(const_offset / 16))); - } else { - /* Immediates are fine on older generations since they'll be moved - * to a (potentially fake) MRF at the generator level. - */ - offset = src_reg(const_offset / 16); - } - } else { - offset = src_reg(this, glsl_type::uint_type); - emit(SHR(dst_reg(offset), op[1], src_reg(4))); - } - - if (brw->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); - grf_offset.type = offset.type; - - emit(MOV(grf_offset, offset)); - - emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - dst_reg(packed_consts), - surf_index, - src_reg(grf_offset))); - } else { - vec4_instruction *pull = - emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, - dst_reg(packed_consts), - surf_index, - offset)); - pull->base_mrf = 14; - pull->mlen = 1; - } - - packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements); - packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4); - - /* UBO bools are any nonzero int. We need to convert them to use the - * value of true stored in ctx->Const.UniformBooleanTrue. - */ - if (ir->type->base_type == GLSL_TYPE_BOOL) { - emit(CMP(result_dst, packed_consts, src_reg(0u), - BRW_CONDITIONAL_NZ)); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result, src_reg(1))); - } - } else { - emit(MOV(result_dst, packed_consts)); - } - break; - } - - case ir_binop_vector_extract: - unreachable("should have been lowered by vec_index_to_cond_assign"); - - case ir_triop_fma: - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(MAD(result_dst, op[2], op[1], op[0])); - break; - - case ir_triop_lrp: - emit_lrp(result_dst, op[0], op[1], op[2]); - break; - - case ir_triop_csel: - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]); - inst->predicate = BRW_PREDICATE_NORMAL; - break; - - case ir_triop_bfi: - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - emit(BFI2(result_dst, op[0], op[1], op[2])); - break; - - case ir_triop_bitfield_extract: - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(BFE(result_dst, op[2], op[1], op[0])); - break; - - case ir_triop_vector_insert: - unreachable("should have been lowered by lower_vector_insert"); - - case ir_quadop_bitfield_insert: - unreachable("not reached: should be handled by " - "bitfield_insert_to_bfm_bfi\n"); - - case ir_quadop_vector: - unreachable("not reached: should be handled by lower_quadop_vector"); - - case ir_unop_pack_half_2x16: - emit_pack_half_2x16(result_dst, op[0]); - break; - case ir_unop_unpack_half_2x16: - emit_unpack_half_2x16(result_dst, op[0]); - break; - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_unorm_4x8: - unreachable("not reached: should be handled by lower_packing_builtins"); - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - case ir_binop_pack_half_2x16_split: - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_sample: - case ir_binop_interpolate_at_offset: - unreachable("not reached: should not occur in vertex shader"); - case ir_binop_ldexp: - unreachable("not reached: should be handled by ldexp_to_arith()"); - } -} - - -void -vec4_visitor::visit(ir_swizzle *ir) -{ - src_reg src; - int i = 0; - int swizzle[4]; - - /* Note that this is only swizzles in expressions, not those on the left - * hand side of an assignment, which do write masking. See ir_assignment - * for that. - */ - - ir->val->accept(this); - src = this->result; - assert(src.file != BAD_FILE); - - for (i = 0; i < ir->type->vector_elements; i++) { - switch (i) { - case 0: - swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); - break; - case 1: - swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); - break; - case 2: - swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); - break; - case 3: - swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); - break; - } - } - for (; i < 4; i++) { - /* Replicate the last channel out. */ - swizzle[i] = swizzle[ir->type->vector_elements - 1]; - } - - src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - - this->result = src; -} - -void -vec4_visitor::visit(ir_dereference_variable *ir) -{ - const struct glsl_type *type = ir->type; - dst_reg *reg = variable_storage(ir->var); - - if (!reg) { - fail("Failed to find variable storage for %s\n", ir->var->name); - this->result = src_reg(brw_null_reg()); - return; - } - - this->result = src_reg(*reg); - - /* System values get their swizzle from the dst_reg writemask */ - if (ir->var->data.mode == ir_var_system_value) - return; - - if (type->is_scalar() || type->is_vector() || type->is_matrix()) - this->result.swizzle = swizzle_for_size(type->vector_elements); -} - - -int -vec4_visitor::compute_array_stride(ir_dereference_array *ir) -{ - /* Under normal circumstances array elements are stored consecutively, so - * the stride is equal to the size of the array element. - */ - return type_size(ir->type); -} - - -void -vec4_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *constant_index; - src_reg src; - int array_stride = compute_array_stride(ir); - - constant_index = ir->array_index->constant_expression_value(); - - ir->array->accept(this); - src = this->result; - - if (constant_index) { - src.reg_offset += constant_index->value.i[0] * array_stride; - } else { - /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the Mesa register - * index. - */ - ir->array_index->accept(this); - - src_reg index_reg; - - if (array_stride == 1) { - index_reg = this->result; - } else { - index_reg = src_reg(this, glsl_type::int_type); - - emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride))); - } - - if (src.reladdr) { - src_reg temp = src_reg(this, glsl_type::int_type); - - emit(ADD(dst_reg(temp), *src.reladdr, index_reg)); - - index_reg = temp; - } - - src.reladdr = ralloc(mem_ctx, src_reg); - memcpy(src.reladdr, &index_reg, sizeof(index_reg)); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix()) - src.swizzle = swizzle_for_size(ir->type->vector_elements); - else - src.swizzle = BRW_SWIZZLE_NOOP; - src.type = brw_type_for_base_type(ir->type); - - this->result = src; -} - -void -vec4_visitor::visit(ir_dereference_record *ir) -{ - unsigned int i; - const glsl_type *struct_type = ir->record->type; - int offset = 0; - - ir->record->accept(this); - - for (i = 0; i < struct_type->length; i++) { - if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) - break; - offset += type_size(struct_type->fields.structure[i].type); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix()) - this->result.swizzle = swizzle_for_size(ir->type->vector_elements); - else - this->result.swizzle = BRW_SWIZZLE_NOOP; - this->result.type = brw_type_for_base_type(ir->type); - - this->result.reg_offset += offset; -} - -/** - * We want to be careful in assignment setup to hit the actual storage - * instead of potentially using a temporary like we might with the - * ir_dereference handler. - */ -static dst_reg -get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) -{ - /* The LHS must be a dereference. If the LHS is a variable indexed array - * access of a vector, it must be separated into a series conditional moves - * before reaching this point (see ir_vec_index_to_cond_assign). - */ - assert(ir->as_dereference()); - ir_dereference_array *deref_array = ir->as_dereference_array(); - if (deref_array) { - assert(!deref_array->array->type->is_vector()); - } - - /* Use the rvalue deref handler for the most part. We'll ignore - * swizzles in it and write swizzles using writemask, though. - */ - ir->accept(v); - return dst_reg(v->result); -} - -void -vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, - const struct glsl_type *type, - enum brw_predicate predicate) -{ - if (type->base_type == GLSL_TYPE_STRUCT) { - for (unsigned int i = 0; i < type->length; i++) { - emit_block_move(dst, src, type->fields.structure[i].type, predicate); - } - return; - } - - if (type->is_array()) { - for (unsigned int i = 0; i < type->length; i++) { - emit_block_move(dst, src, type->fields.array, predicate); - } - return; - } - - if (type->is_matrix()) { - const struct glsl_type *vec_type; - - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - type->vector_elements, 1); - - for (int i = 0; i < type->matrix_columns; i++) { - emit_block_move(dst, src, vec_type, predicate); - } - return; - } - - assert(type->is_scalar() || type->is_vector()); - - dst->type = brw_type_for_base_type(type); - src->type = dst->type; - - dst->writemask = (1 << type->vector_elements) - 1; - - src->swizzle = swizzle_for_size(type->vector_elements); - - vec4_instruction *inst = emit(MOV(*dst, *src)); - inst->predicate = predicate; - - dst->reg_offset++; - src->reg_offset++; -} - - -/* If the RHS processing resulted in an instruction generating a - * temporary value, and it would be easy to rewrite the instruction to - * generate its result right into the LHS instead, do so. This ends - * up reliably removing instructions where it can be tricky to do so - * later without real UD chain information. - */ -bool -vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, - dst_reg dst, - src_reg src, - vec4_instruction *pre_rhs_inst, - vec4_instruction *last_rhs_inst) -{ - /* This could be supported, but it would take more smarts. */ - if (ir->condition) - return false; - - if (pre_rhs_inst == last_rhs_inst) - return false; /* No instructions generated to work with. */ - - /* Make sure the last instruction generated our source reg. */ - if (src.file != GRF || - src.file != last_rhs_inst->dst.file || - src.reg != last_rhs_inst->dst.reg || - src.reg_offset != last_rhs_inst->dst.reg_offset || - src.reladdr || - src.abs || - src.negate || - last_rhs_inst->predicate != BRW_PREDICATE_NONE) - return false; - - /* Check that that last instruction fully initialized the channels - * we want to use, in the order we want to use them. We could - * potentially reswizzle the operands of many instructions so that - * we could handle out of order channels, but don't yet. - */ - - for (unsigned i = 0; i < 4; i++) { - if (dst.writemask & (1 << i)) { - if (!(last_rhs_inst->dst.writemask & (1 << i))) - return false; - - if (BRW_GET_SWZ(src.swizzle, i) != i) - return false; - } - } - - /* Success! Rewrite the instruction. */ - last_rhs_inst->dst.file = dst.file; - last_rhs_inst->dst.reg = dst.reg; - last_rhs_inst->dst.reg_offset = dst.reg_offset; - last_rhs_inst->dst.reladdr = dst.reladdr; - last_rhs_inst->dst.writemask &= dst.writemask; - - return true; -} - -void -vec4_visitor::visit(ir_assignment *ir) -{ - dst_reg dst = get_assignment_lhs(ir->lhs, this); - enum brw_predicate predicate = BRW_PREDICATE_NONE; - - if (!ir->lhs->type->is_scalar() && - !ir->lhs->type->is_vector()) { - ir->rhs->accept(this); - src_reg src = this->result; - - if (ir->condition) { - emit_bool_to_cond_code(ir->condition, &predicate); - } - - /* emit_block_move doesn't account for swizzles in the source register. - * This should be ok, since the source register is a structure or an - * array, and those can't be swizzled. But double-check to be sure. - */ - assert(src.swizzle == - (ir->rhs->type->is_matrix() - ? swizzle_for_size(ir->rhs->type->vector_elements) - : BRW_SWIZZLE_NOOP)); - - emit_block_move(&dst, &src, ir->rhs->type, predicate); - return; - } - - /* Now we're down to just a scalar/vector with writemasks. */ - int i; - - vec4_instruction *pre_rhs_inst, *last_rhs_inst; - pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); - - ir->rhs->accept(this); - - last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); - - src_reg src = this->result; - - int swizzles[4]; - int first_enabled_chan = 0; - int src_chan = 0; - - assert(ir->lhs->type->is_vector() || - ir->lhs->type->is_scalar()); - dst.writemask = ir->write_mask; - - for (int i = 0; i < 4; i++) { - if (dst.writemask & (1 << i)) { - first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); - break; - } - } - - /* Swizzle a small RHS vector into the channels being written. - * - * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while in our instructions we need to make - * those channels appear in the slots of the vec4 they're written to. - */ - for (int i = 0; i < 4; i++) { - if (dst.writemask & (1 << i)) - swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); - else - swizzles[i] = first_enabled_chan; - } - src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], - swizzles[2], swizzles[3]); - - if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { - return; - } - - if (ir->condition) { - emit_bool_to_cond_code(ir->condition, &predicate); - } - - for (i = 0; i < type_size(ir->lhs->type); i++) { - vec4_instruction *inst = emit(MOV(dst, src)); - inst->predicate = predicate; - - dst.reg_offset++; - src.reg_offset++; - } -} - -void -vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) -{ - if (ir->type->base_type == GLSL_TYPE_STRUCT) { - foreach_in_list(ir_constant, field_value, &ir->components) { - emit_constant_values(dst, field_value); - } - return; - } - - if (ir->type->is_array()) { - for (unsigned int i = 0; i < ir->type->length; i++) { - emit_constant_values(dst, ir->array_elements[i]); - } - return; - } - - if (ir->type->is_matrix()) { - for (int i = 0; i < ir->type->matrix_columns; i++) { - float *vec = &ir->value.f[i * ir->type->vector_elements]; - - for (int j = 0; j < ir->type->vector_elements; j++) { - dst->writemask = 1 << j; - dst->type = BRW_REGISTER_TYPE_F; - - emit(MOV(*dst, src_reg(vec[j]))); - } - dst->reg_offset++; - } - return; - } - - int remaining_writemask = (1 << ir->type->vector_elements) - 1; - - for (int i = 0; i < ir->type->vector_elements; i++) { - if (!(remaining_writemask & (1 << i))) - continue; - - dst->writemask = 1 << i; - dst->type = brw_type_for_base_type(ir->type); - - /* Find other components that match the one we're about to - * write. Emits fewer instructions for things like vec4(0.5, - * 1.5, 1.5, 1.5). - */ - for (int j = i + 1; j < ir->type->vector_elements; j++) { - if (ir->type->base_type == GLSL_TYPE_BOOL) { - if (ir->value.b[i] == ir->value.b[j]) - dst->writemask |= (1 << j); - } else { - /* u, i, and f storage all line up, so no need for a - * switch case for comparing each type. - */ - if (ir->value.u[i] == ir->value.u[j]) - dst->writemask |= (1 << j); - } - } - - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(MOV(*dst, src_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_INT: - emit(MOV(*dst, src_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_UINT: - emit(MOV(*dst, src_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_BOOL: - emit(MOV(*dst, - src_reg(ir->value.b[i] != 0 ? ctx->Const.UniformBooleanTrue - : 0))); - break; - default: - unreachable("Non-float/uint/int/bool constant"); - } - - remaining_writemask &= ~dst->writemask; - } - dst->reg_offset++; -} - -void -vec4_visitor::visit(ir_constant *ir) -{ - dst_reg dst = dst_reg(this, ir->type); - this->result = src_reg(dst); - - emit_constant_values(&dst, ir); -} - -void -vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir) -{ - ir_dereference *deref = static_cast<ir_dereference *>( - ir->actual_parameters.get_head()); - ir_variable *location = deref->variable_referenced(); - unsigned surf_index = (prog_data->base.binding_table.abo_start + - location->data.binding); - - /* Calculate the surface offset */ - src_reg offset(this, glsl_type::uint_type); - ir_dereference_array *deref_array = deref->as_dereference_array(); - if (deref_array) { - deref_array->array_index->accept(this); - - src_reg tmp(this, glsl_type::uint_type); - emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE)); - emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset)); - } else { - offset = location->data.atomic.offset; - } - - /* Emit the appropriate machine instruction */ - const char *callee = ir->callee->function_name(); - dst_reg dst = get_assignment_lhs(ir->return_deref, this); - - if (!strcmp("__intrinsic_atomic_read", callee)) { - emit_untyped_surface_read(surf_index, dst, offset); - - } else if (!strcmp("__intrinsic_atomic_increment", callee)) { - emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, - src_reg(), src_reg()); - - } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { - emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, - src_reg(), src_reg()); - } -} - -void -vec4_visitor::visit(ir_call *ir) -{ - const char *callee = ir->callee->function_name(); - - if (!strcmp("__intrinsic_atomic_read", callee) || - !strcmp("__intrinsic_atomic_increment", callee) || - !strcmp("__intrinsic_atomic_predecrement", callee)) { - visit_atomic_counter_intrinsic(ir); - } else { - unreachable("Unsupported intrinsic."); - } -} - -src_reg -vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler) -{ - vec4_instruction *inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS); - inst->base_mrf = 2; - inst->mlen = 1; - inst->dst = dst_reg(this, glsl_type::uvec4_type); - inst->dst.writemask = WRITEMASK_XYZW; - - inst->src[1] = sampler; - - /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */ - int param_base = inst->base_mrf; - int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1; - int zero_mask = 0xf & ~coord_mask; - - emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask), - coordinate)); - - emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask), - src_reg(0))); - - emit(inst); - return src_reg(inst->dst); -} - -static bool -is_high_sampler(struct brw_context *brw, src_reg sampler) -{ - if (brw->gen < 8 && !brw->is_haswell) - return false; - - return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; -} - -void -vec4_visitor::visit(ir_texture *ir) -{ - uint32_t sampler = - _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); - - ir_rvalue *nonconst_sampler_index = - _mesa_get_sampler_array_nonconst_index(ir->sampler); - - /* Handle non-constant sampler array indexing */ - src_reg sampler_reg; - if (nonconst_sampler_index) { - /* The highest sampler which may be used by this operation is - * the last element of the array. Mark it here, because the generator - * doesn't have enough information to determine the bound. - */ - uint32_t array_size = ir->sampler->as_dereference_array() - ->array->type->array_size(); - - uint32_t max_used = sampler + array_size - 1; - if (ir->op == ir_tg4 && brw->gen < 8) { - max_used += prog_data->base.binding_table.gather_texture_start; - } else { - max_used += prog_data->base.binding_table.texture_start; - } - - brw_mark_surface_used(&prog_data->base, max_used); - - /* Emit code to evaluate the actual indexing expression */ - nonconst_sampler_index->accept(this); - dst_reg temp(this, glsl_type::uint_type); - emit(ADD(temp, this->result, src_reg(sampler))) - ->force_writemask_all = true; - sampler_reg = src_reg(temp); - } else { - /* Single sampler, or constant array index; the indexing expression - * is just an immediate. - */ - sampler_reg = src_reg(sampler); - } - - /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother - * emitting anything other than setting up the constant result. - */ - if (ir->op == ir_tg4) { - ir_constant *chan = ir->lod_info.component->as_constant(); - int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]); - if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { - dst_reg result(this, ir->type); - this->result = src_reg(result); - emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f))); - return; - } - } - - /* Should be lowered by do_lower_texture_projection */ - assert(!ir->projector); - - /* Should be lowered */ - assert(!ir->offset || !ir->offset->type->is_array()); - - /* Generate code to compute all the subexpression trees. This has to be - * done before loading any values into MRFs for the sampler message since - * generating these values may involve SEND messages that need the MRFs. - */ - src_reg coordinate; - if (ir->coordinate) { - ir->coordinate->accept(this); - coordinate = this->result; - } - - src_reg shadow_comparitor; - if (ir->shadow_comparitor) { - ir->shadow_comparitor->accept(this); - shadow_comparitor = this->result; - } - - bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant(); - src_reg offset_value; - if (has_nonconstant_offset) { - ir->offset->accept(this); - offset_value = src_reg(this->result); - } - - const glsl_type *lod_type = NULL, *sample_index_type = NULL; - src_reg lod, dPdx, dPdy, sample_index, mcs; - switch (ir->op) { - case ir_tex: - lod = src_reg(0.0f); - lod_type = glsl_type::float_type; - break; - case ir_txf: - case ir_txl: - case ir_txs: - ir->lod_info.lod->accept(this); - lod = this->result; - lod_type = ir->lod_info.lod->type; - break; - case ir_query_levels: - lod = src_reg(0); - lod_type = glsl_type::int_type; - break; - case ir_txf_ms: - ir->lod_info.sample_index->accept(this); - sample_index = this->result; - sample_index_type = ir->lod_info.sample_index->type; - - if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler)) - mcs = emit_mcs_fetch(ir, coordinate, sampler_reg); - else - mcs = src_reg(0u); - break; - case ir_txd: - ir->lod_info.grad.dPdx->accept(this); - dPdx = this->result; - - ir->lod_info.grad.dPdy->accept(this); - dPdy = this->result; - - lod_type = ir->lod_info.grad.dPdx->type; - break; - case ir_txb: - case ir_lod: - case ir_tg4: - break; - } - - enum opcode opcode; - switch (ir->op) { - case ir_tex: opcode = SHADER_OPCODE_TXL; break; - case ir_txl: opcode = SHADER_OPCODE_TXL; break; - case ir_txd: opcode = SHADER_OPCODE_TXD; break; - case ir_txf: opcode = SHADER_OPCODE_TXF; break; - case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; - case ir_txs: opcode = SHADER_OPCODE_TXS; break; - case ir_tg4: opcode = has_nonconstant_offset - ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break; - case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; - case ir_txb: - unreachable("TXB is not valid for vertex shaders."); - case ir_lod: - unreachable("LOD is not valid for vertex shaders."); - default: - unreachable("Unrecognized tex op"); - } - - vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode); - - if (ir->offset != NULL && ir->op != ir_txf) - inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant()); - - /* Stuff the channel select bits in the top of the texture offset */ - if (ir->op == ir_tg4) - inst->texture_offset |= gather_channel(ir, sampler) << 16; - - /* The message header is necessary for: - * - Gen4 (always) - * - Texel offsets - * - Gather channel selection - * - Sampler indices too large to fit in a 4-bit value. - */ - inst->header_present = - brw->gen < 5 || inst->texture_offset != 0 || ir->op == ir_tg4 || - is_high_sampler(brw, sampler_reg); - inst->base_mrf = 2; - inst->mlen = inst->header_present + 1; /* always at least one */ - inst->dst = dst_reg(this, ir->type); - inst->dst.writemask = WRITEMASK_XYZW; - inst->shadow_compare = ir->shadow_comparitor != NULL; - - inst->src[1] = sampler_reg; - - /* MRF for the first parameter */ - int param_base = inst->base_mrf + inst->header_present; - if (ir->op == ir_txs || ir->op == ir_query_levels) { - int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X; - emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod)); } else { - /* Load the coordinate */ - /* FINISHME: gl_clamp_mask and saturate */ - int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1; - int zero_mask = 0xf & ~coord_mask; - - emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask), - coordinate)); - - if (zero_mask != 0) { - emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask), - src_reg(0))); - } - /* Load the shadow comparitor */ - if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) { - emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type, - WRITEMASK_X), - shadow_comparitor)); - inst->mlen++; - } - - /* Load the LOD info */ - if (ir->op == ir_tex || ir->op == ir_txl) { - int mrf, writemask; - if (brw->gen >= 5) { - mrf = param_base + 1; - if (ir->shadow_comparitor) { - writemask = WRITEMASK_Y; - /* mlen already incremented */ - } else { - writemask = WRITEMASK_X; - inst->mlen++; - } - } else /* brw->gen == 4 */ { - mrf = param_base; - writemask = WRITEMASK_W; - } - emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod)); - } else if (ir->op == ir_txf) { - emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod)); - } else if (ir->op == ir_txf_ms) { - emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X), - sample_index)); - if (brw->gen >= 7) { - /* MCS data is in the first channel of `mcs`, but we need to get it into - * the .y channel of the second vec4 of params, so replicate .x across - * the whole vec4 and then mask off everything except .y - */ - mcs.swizzle = BRW_SWIZZLE_XXXX; - emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y), - mcs)); - } - inst->mlen++; - } else if (ir->op == ir_txd) { - const glsl_type *type = lod_type; - - if (brw->gen >= 5) { - dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y); - dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y); - emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx)); - emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy)); - inst->mlen++; - - if (ir->type->vector_elements == 3 || ir->shadow_comparitor) { - dPdx.swizzle = BRW_SWIZZLE_ZZZZ; - dPdy.swizzle = BRW_SWIZZLE_ZZZZ; - emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx)); - emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy)); - inst->mlen++; - - if (ir->shadow_comparitor) { - emit(MOV(dst_reg(MRF, param_base + 2, - ir->shadow_comparitor->type, WRITEMASK_Z), - shadow_comparitor)); - } - } - } else /* brw->gen == 4 */ { - emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx)); - emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy)); - inst->mlen += 2; - } - } else if (ir->op == ir_tg4 && has_nonconstant_offset) { - if (ir->shadow_comparitor) { - emit(MOV(dst_reg(MRF, param_base, ir->shadow_comparitor->type, WRITEMASK_W), - shadow_comparitor)); - } - - emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY), - offset_value)); - inst->mlen++; - } - } - - emit(inst); - - /* fixup num layers (z) for cube arrays: hardware returns faces * layers; - * spec requires layers. - */ - if (ir->op == ir_txs) { - glsl_type const *type = ir->sampler->type; - if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - type->sampler_array) { - emit_math(SHADER_OPCODE_INT_QUOTIENT, - writemask(inst->dst, WRITEMASK_Z), - src_reg(inst->dst), src_reg(6)); - } - } - - if (brw->gen == 6 && ir->op == ir_tg4) { - emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst); - } - - swizzle_result(ir, src_reg(inst->dst), sampler); -} - -/** - * Apply workarounds for Gen6 gather with UINT/SINT - */ -void -vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) -{ - if (!wa) + backend_visitor::visit(ir); return; - - int width = (wa & WA_8BIT) ? 8 : 16; - dst_reg dst_f = dst; - dst_f.type = BRW_REGISTER_TYPE_F; - - /* Convert from UNORM to UINT */ - emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1)))); - emit(MOV(dst, src_reg(dst_f))); - - if (wa & WA_SIGN) { - /* Reinterpret the UINT value as a signed INT value by - * shifting the sign bit into place, then shifting back - * preserving sign. - */ - emit(SHL(dst, src_reg(dst), src_reg(32 - width))); - emit(ASR(dst, src_reg(dst), src_reg(32 - width))); } -} -/** - * Set up the gather channel based on the swizzle, for gather4. - */ -uint32_t -vec4_visitor::gather_channel(ir_texture *ir, uint32_t sampler) -{ - ir_constant *chan = ir->lod_info.component->as_constant(); - int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]); - switch (swiz) { - case SWIZZLE_X: return 0; - case SWIZZLE_Y: - /* gather4 sampler is broken for green channel on RG32F -- - * we must ask for blue instead. - */ - if (key->tex.gather_channel_quirk_mask & (1<<sampler)) - return 2; - return 1; - case SWIZZLE_Z: return 2; - case SWIZZLE_W: return 3; - default: - unreachable("Not reached"); /* zero, one swizzles handled already */ - } -} - -void -vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler) -{ - int s = key->tex.swizzles[sampler]; - - this->result = src_reg(this, ir->type); - dst_reg swizzled_result(this->result); - - if (ir->op == ir_query_levels) { - /* # levels is in .w */ - orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - emit(MOV(swizzled_result, orig_val)); - return; - } - - if (ir->op == ir_txs || ir->type == glsl_type::float_type - || s == SWIZZLE_NOOP || ir->op == ir_tg4) { - emit(MOV(swizzled_result, orig_val)); - return; - } - - - int zero_mask = 0, one_mask = 0, copy_mask = 0; - int swizzle[4] = {0}; - - for (int i = 0; i < 4; i++) { - switch (GET_SWZ(s, i)) { - case SWIZZLE_ZERO: - zero_mask |= (1 << i); - break; - case SWIZZLE_ONE: - one_mask |= (1 << i); - break; - default: - copy_mask |= (1 << i); - swizzle[i] = GET_SWZ(s, i); - break; - } - } - - if (copy_mask) { - orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - swizzled_result.writemask = copy_mask; - emit(MOV(swizzled_result, orig_val)); - } - - if (zero_mask) { - swizzled_result.writemask = zero_mask; - emit(MOV(swizzled_result, src_reg(0.0f))); - } - - if (one_mask) { - swizzled_result.writemask = one_mask; - emit(MOV(swizzled_result, src_reg(1.0f))); - } -} - -void -vec4_visitor::visit(ir_return *) -{ - unreachable("not reached"); + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); } void @@ -2801,35 +350,6 @@ vec4_visitor::visit(ir_discard *) } void -vec4_visitor::visit(ir_if *ir) -{ - /* Don't point the annotation at the if statement, because then it plus - * the then and else blocks get printed. - */ - this->base_ir = ir->condition; - - if (brw->gen == 6) { - emit_if_gen6(ir); - } else { - enum brw_predicate predicate; - emit_bool_to_cond_code(ir->condition, &predicate); - emit(IF(predicate)); - } - - visit_instructions(&ir->then_instructions); - - if (!ir->else_instructions.is_empty()) { - this->base_ir = ir->condition; - emit(BRW_OPCODE_ELSE); - - visit_instructions(&ir->else_instructions); - } - - this->base_ir = ir->condition; - emit(BRW_OPCODE_ENDIF); -} - -void vec4_visitor::visit(ir_emit_vertex *) { unreachable("not reached"); @@ -2842,55 +362,6 @@ vec4_visitor::visit(ir_end_primitive *) } void -vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, - dst_reg dst, src_reg offset, - src_reg src0, src_reg src1) -{ - unsigned mlen = 0; - - /* Set the atomic operation offset. */ - emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset)); - mlen++; - - /* Set the atomic operation arguments. */ - if (src0.file != BAD_FILE) { - emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0)); - mlen++; - } - - if (src1.file != BAD_FILE) { - emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1)); - mlen++; - } - - /* Emit the instruction. Note that this maps to the normal SIMD8 - * untyped atomic message on Ivy Bridge, but that's OK because - * unused channels will be masked out. - */ - vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, - src_reg(atomic_op), src_reg(surf_index)); - inst->base_mrf = 0; - inst->mlen = mlen; -} - -void -vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst, - src_reg offset) -{ - /* Set the surface read offset. */ - emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset)); - - /* Emit the instruction. Note that this maps to the normal SIMD8 - * untyped surface read message, but that's OK because unused - * channels will be masked out. - */ - vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, - dst, src_reg(surf_index)); - inst->base_mrf = 0; - inst->mlen = 1; -} - -void vec4_visitor::emit_ndc_computation() { /* Get the position */ @@ -2900,17 +371,17 @@ vec4_visitor::emit_ndc_computation() dst_reg ndc = dst_reg(this, glsl_type::vec4_type); output_reg[BRW_VARYING_SLOT_NDC] = ndc; - current_annotation = "NDC"; + bld.set_annotation("NDC"); dst_reg ndc_w = ndc; ndc_w.writemask = WRITEMASK_W; src_reg pos_w = pos; pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); + bld.emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); dst_reg ndc_xyz = ndc; ndc_xyz.writemask = WRITEMASK_XYZ; - emit(MUL(ndc_xyz, pos, src_reg(ndc_w))); + bld.MUL(ndc_xyz, pos, src_reg(ndc_w)); } void @@ -2923,29 +394,29 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) dst_reg header1_w = header1; header1_w.writemask = WRITEMASK_W; - emit(MOV(header1, 0u)); + bld.MOV(header1, 0u); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]); - current_annotation = "Point size"; - emit(MUL(header1_w, psiz, src_reg((float)(1 << 11)))); - emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8)); + bld.set_annotation("Point size"); + bld.MUL(header1_w, psiz, src_reg((float)(1 << 11))); + bld.AND(header1_w, src_reg(header1_w), 0x7ff << 8); } if (key->userclip_active) { - current_annotation = "Clipping flags"; + bld.set_annotation("Clipping flags"); dst_reg flags0 = dst_reg(this, glsl_type::uint_type); dst_reg flags1 = dst_reg(this, glsl_type::uint_type); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L)); - emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0)); - emit(OR(header1_w, src_reg(header1_w), src_reg(flags0))); + bld.CMP(bld.reg_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L); + bld.emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0)); + bld.OR(header1_w, src_reg(header1_w), src_reg(flags0)); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L)); - emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0)); - emit(SHL(flags1, src_reg(flags1), src_reg(4))); - emit(OR(header1_w, src_reg(header1_w), src_reg(flags1))); + bld.CMP(bld.reg_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L); + bld.emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0)); + bld.SHL(flags1, src_reg(flags1), src_reg(4)); + bld.OR(header1_w, src_reg(header1_w), src_reg(flags1)); } /* i965 clipping workaround: @@ -2960,35 +431,35 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) if (brw->has_negative_rhw_bug) { src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]); ndc_w.swizzle = BRW_SWIZZLE_WWWW; - emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L)); + bld.CMP(bld.reg_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L); vec4_instruction *inst; - inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6))); + inst = bld.OR(header1_w, src_reg(header1_w), src_reg(1u << 6)); inst->predicate = BRW_PREDICATE_NORMAL; - inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f))); + inst = bld.MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)); inst->predicate = BRW_PREDICATE_NORMAL; } - emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1))); + bld.MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)); } else if (brw->gen < 6) { - emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u)); + bld.MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u); } else { - emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0))); + bld.MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { dst_reg reg_w = reg; reg_w.writemask = WRITEMASK_W; - emit(MOV(reg_w, src_reg(output_reg[VARYING_SLOT_PSIZ]))); + bld.MOV(reg_w, src_reg(output_reg[VARYING_SLOT_PSIZ])); } if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) { dst_reg reg_y = reg; reg_y.writemask = WRITEMASK_Y; reg_y.type = BRW_REGISTER_TYPE_D; - emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER]))); + bld.MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])); } if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) { dst_reg reg_z = reg; reg_z.writemask = WRITEMASK_Z; reg_z.type = BRW_REGISTER_TYPE_D; - emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT]))); + bld.MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])); } } } @@ -3016,9 +487,9 @@ vec4_visitor::emit_clip_distances(dst_reg reg, int offset) for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4; ++i) { reg.writemask = 1 << i; - emit(DP4(reg, + bld.DP4(reg, src_reg(output_reg[clip_vertex]), - src_reg(this->userplane[i + offset]))); + src_reg(this->userplane[i + offset])); } } @@ -3027,10 +498,10 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) { assert (varying < VARYING_SLOT_MAX); reg.type = output_reg[varying].type; - current_annotation = output_reg_annotation[varying]; + bld.set_annotation(output_reg_annotation[varying]); /* Copy the register, saturating if necessary */ - vec4_instruction *inst = emit(MOV(reg, - src_reg(output_reg[varying]))); + vec4_instruction *inst = bld.MOV(reg, + src_reg(output_reg[varying])); if ((varying == VARYING_SLOT_COL0 || varying == VARYING_SLOT_COL1 || varying == VARYING_SLOT_BFC0 || @@ -3049,17 +520,17 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) case VARYING_SLOT_PSIZ: { /* PSIZ is always in slot 0, and is coupled with other flags. */ - current_annotation = "indices, point width, clip flags"; + bld.set_annotation("indices, point width, clip flags"); emit_psiz_and_flags(reg); break; } case BRW_VARYING_SLOT_NDC: - current_annotation = "NDC"; - emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC]))); + bld.set_annotation("NDC"); + bld.MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])); break; case VARYING_SLOT_POS: - current_annotation = "gl_Position"; - emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS]))); + bld.set_annotation("gl_Position"); + bld.MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])); break; case VARYING_SLOT_EDGE: /* This is present when doing unfilled polygons. We're supposed to copy @@ -3068,9 +539,9 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) * of that attribute (starts as 1.0f). This is then used in clipping to * determine which edges should be drawn as wireframe. */ - current_annotation = "edge flag"; - emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG, - glsl_type::float_type, WRITEMASK_XYZW)))); + bld.set_annotation("edge flag"); + bld.MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG, + glsl_type::float_type, WRITEMASK_XYZW))); break; case BRW_VARYING_SLOT_PAD: /* No need to write to this slot */ @@ -3138,7 +609,7 @@ vec4_visitor::emit_vertex() /* Lower legacy ff and ClipVertex clipping to clip distances */ if (key->userclip_active && !prog->UsesClipDistanceOut) { - current_annotation = "user clip distances"; + bld.set_annotation("user clip distances"); output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type); output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type); @@ -3173,7 +644,7 @@ vec4_visitor::emit_vertex() } complete = slot >= prog_data->vue_map.num_slots; - current_annotation = "URB write"; + bld.set_annotation("URB write"); vec4_instruction *inst = emit_urb_write_opcode(complete); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); @@ -3198,44 +669,14 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst, message_header_scale *= 16; if (reladdr) { + vec4_builder ibld = bld.at(block, inst); src_reg index = src_reg(this, glsl_type::int_type); - emit_before(block, inst, ADD(dst_reg(index), *reladdr, - src_reg(reg_offset))); - emit_before(block, inst, MUL(dst_reg(index), index, - src_reg(message_header_scale))); - - return index; - } else { - return src_reg(reg_offset * message_header_scale); - } -} - -src_reg -vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst, - src_reg *reladdr, int reg_offset) -{ - if (reladdr) { - src_reg index = src_reg(this, glsl_type::int_type); - - emit_before(block, inst, ADD(dst_reg(index), *reladdr, - src_reg(reg_offset))); - - /* Pre-gen6, the message header uses byte offsets instead of vec4 - * (16-byte) offset units. - */ - if (brw->gen < 6) { - emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16))); - } + ibld.ADD(dst_reg(index), *reladdr, src_reg(reg_offset)); + ibld.MUL(dst_reg(index), index, src_reg(message_header_scale)); return index; - } else if (brw->gen >= 8) { - /* Store the offset in a GRF so we can send-from-GRF. */ - src_reg offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset))); - return offset; } else { - int message_header_scale = brw->gen < 6 ? 16 : 1; return src_reg(reg_offset * message_header_scale); } } @@ -3251,11 +692,12 @@ vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst, dst_reg temp, src_reg orig_src, int base_offset) { + vec4_builder ibld = bld.at(block, inst); int reg_offset = base_offset + orig_src.reg_offset; src_reg index = get_scratch_offset(block, inst, orig_src.reladdr, reg_offset); - emit_before(block, inst, SCRATCH_READ(temp, index)); + SCRATCH_READ(ibld, temp, index); } /** @@ -3291,13 +733,13 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], swizzles[2], swizzles[3]); + vec4_builder ibld = bld.at(block, (vec4_instruction *)inst->next); dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), inst->dst.writemask)); - vec4_instruction *write = SCRATCH_WRITE(dst, temp, index); - write->predicate = inst->predicate; - write->ir = inst->ir; - write->annotation = inst->annotation; - inst->insert_after(block, write); + ibld.set_base_ir(inst->ir); + ibld.set_annotation(inst->annotation); + exec_predicate(inst->predicate, + SCRATCH_WRITE(ibld, dst, temp, index)); inst->dst.file = temp.file; inst->dst.reg = temp.reg; @@ -3346,8 +788,8 @@ vec4_visitor::move_grf_array_access_to_scratch() */ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; + bld.set_base_ir(inst->ir); + bld.set_annotation(inst->annotation); if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); @@ -3371,37 +813,6 @@ vec4_visitor::move_grf_array_access_to_scratch() } /** - * Emits an instruction before @inst to load the value named by @orig_src - * from the pull constant buffer (surface) at @base_offset to @temp. - */ -void -vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, - dst_reg temp, src_reg orig_src, - int base_offset) -{ - int reg_offset = base_offset + orig_src.reg_offset; - src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start); - src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr, - reg_offset); - vec4_instruction *load; - - if (brw->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); - grf_offset.type = offset.type; - emit_before(block, inst, MOV(grf_offset, offset)); - - load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - temp, index, src_reg(grf_offset)); - } else { - load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, - temp, index, offset); - load->base_mrf = 14; - load->mlen = 1; - } - emit_before(block, inst, load); -} - -/** * Implements array access of uniforms by inserting a * PULL_CONSTANT_LOAD instruction. * @@ -3430,7 +841,7 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) continue; - int uniform = inst->src[i].reg; + unsigned uniform = inst->src[i].reg; /* If this array isn't already present in the pull constant buffer, * add it. @@ -3449,13 +860,16 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() } /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; + bld.set_base_ir(inst->ir); + bld.set_annotation(inst->annotation); - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + vec4_builder ibld = bld.at(block, inst); + int loc = pull_constant_loc[uniform] + inst->src[i].reg_offset; + src_reg surf_index(prog_data->base.binding_table.pull_constants_start); + dst_reg temp = bld.natural_reg(BRW_REGISTER_TYPE_F); - emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + emit_pull_constant_load(ibld, temp, surf_index, 16 * loc, + inst->src[i].reladdr, 4); inst->src[i].file = temp.file; inst->src[i].reg = temp.reg; @@ -3472,16 +886,162 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() split_uniform_registers(); } -void -vec4_visitor::resolve_ud_negate(src_reg *reg) +static bool +is_high_sampler(struct brw_context *brw, src_reg sampler) { - if (reg->type != BRW_REGISTER_TYPE_UD || - !reg->negate) - return; + if (brw->gen < 8 && !brw->is_haswell) + return false; - src_reg temp = src_reg(this, glsl_type::uvec4_type); - emit(BRW_OPCODE_MOV, dst_reg(temp), *reg); - *reg = temp; + return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; +} + +vec4_instruction * +vec4_visitor::emit_texture(ir_texture *ir, const dst_reg &dst, + const src_reg &coordinate, const src_reg &shadow_c, + const src_reg &lod, const src_reg &lod2, + const src_reg &offset_val, const src_reg &sample_index, + const src_reg &mcs, const src_reg &sampler) +{ + const bool has_nonconstant_offset = (offset_val.file != BAD_FILE); + enum opcode opcode; + + switch (ir->op) { + case ir_tex: opcode = SHADER_OPCODE_TXL; break; + case ir_txl: opcode = SHADER_OPCODE_TXL; break; + case ir_txd: opcode = SHADER_OPCODE_TXD; break; + case ir_txf: opcode = SHADER_OPCODE_TXF; break; + case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; + case ir_txs: opcode = SHADER_OPCODE_TXS; break; + case ir_tg4: opcode = has_nonconstant_offset + ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break; + case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; + case ir_txb: + unreachable("TXB is not valid for vertex shaders."); + case ir_lod: + unreachable("LOD is not valid for vertex shaders."); + default: + unreachable("Unrecognized tex op"); + } + + vec4_instruction inst(opcode, dst, src_reg(), sampler); + + /* The message header is necessary for: + * - Gen4 (always) + * - Texel offsets + * - Gather channel selection + * - Sampler indices too large to fit in a 4-bit value. + */ + inst.header_present = + brw->gen < 5 || inst.texture_offset != 0 || ir->op == ir_tg4 || + is_high_sampler(brw, sampler); + inst.base_mrf = 2; + inst.mlen = inst.header_present + 1; /* always at least one */ + + /* MRF for the first parameter */ + dst_reg payload = dst_reg(MRF, inst.base_mrf + inst.header_present); + + if (ir->op == ir_txs || ir->op == ir_query_levels) { + const unsigned mask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X; + bld.MOV(writemask(retype(payload, lod.type), mask), lod); + } else { + /* Load the coordinate */ + /* FINISHME: gl_clamp_mask and saturate */ + int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1; + int zero_mask = 0xf & ~coord_mask; + + bld.MOV(writemask(retype(payload, coordinate.type), coord_mask), + coordinate); + + if (zero_mask != 0) + bld.MOV(writemask(retype(payload, coordinate.type), zero_mask), + src_reg(0)); + + /* Load the shadow comparitor */ + if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) { + bld.MOV(writemask(offset(retype(payload, shadow_c.type), 1), + WRITEMASK_X), + shadow_c); + inst.mlen++; + } + + /* Load the LOD info */ + if (ir->op == ir_tex || ir->op == ir_txl) { + dst_reg mrf; + unsigned mask; + if (brw->gen >= 5) { + mrf = offset(payload, 1); + if (ir->shadow_comparitor) { + mask = WRITEMASK_Y; + /* mlen already incremented */ + } else { + mask = WRITEMASK_X; + inst.mlen++; + } + } else /* brw->gen == 4 */ { + mrf = payload; + mask = WRITEMASK_W; + } + bld.MOV(writemask(retype(mrf, lod.type), mask), lod); + } else if (ir->op == ir_txf) { + bld.MOV(writemask(retype(payload, lod.type), WRITEMASK_W), + lod); + } else if (ir->op == ir_txf_ms) { + bld.MOV(writemask(retype(offset(payload, 1), sample_index.type), + WRITEMASK_X), sample_index); + if (brw->gen >= 7) { + /* MCS data is in the first channel of `mcs`, but we need to get it into + * the .y channel of the second vec4 of params, so replicate .x across + * the whole vec4 and then mask off everything except .y + */ + bld.MOV(writemask(retype(offset(payload, 1), BRW_REGISTER_TYPE_UD), + WRITEMASK_Y), + swizzle(mcs, BRW_SWIZZLE_XXXX)); + } + inst.mlen++; + } else if (ir->op == ir_txd) { + dst_reg mrf = retype(payload, lod.type); + + if (brw->gen >= 5) { + bld.MOV(writemask(offset(mrf, 1), WRITEMASK_XZ), + swizzle(lod, BRW_SWIZZLE_XXYY)); + bld.MOV(writemask(offset(mrf, 1), WRITEMASK_YW), + swizzle(lod2, BRW_SWIZZLE_XXYY)); + inst.mlen++; + + if (ir->type->vector_elements == 3 || ir->shadow_comparitor) { + bld.MOV(writemask(offset(mrf, 2), WRITEMASK_X), + swizzle(lod, BRW_SWIZZLE_ZZZZ)); + bld.MOV(writemask(offset(mrf, 2), WRITEMASK_Y), + swizzle(lod2, BRW_SWIZZLE_ZZZZ)); + inst.mlen++; + + if (ir->shadow_comparitor) + bld.MOV(writemask(offset(retype(payload, shadow_c.type), 2), + WRITEMASK_Z), shadow_c); + } + } else /* brw->gen == 4 */ { + bld.MOV(writemask(offset(mrf, 1), WRITEMASK_XYZ), lod); + bld.MOV(writemask(offset(mrf, 2), WRITEMASK_XYZ), lod2); + inst.mlen += 2; + } + } else if (ir->op == ir_tg4 && has_nonconstant_offset) { + if (ir->shadow_comparitor) + bld.MOV(writemask(retype(payload, shadow_c.type), + WRITEMASK_W), shadow_c); + + bld.MOV(writemask(retype(offset(payload, 1), BRW_REGISTER_TYPE_D), + WRITEMASK_XY), offset_val); + inst.mlen++; + } + } + + return bld.emit(inst); +} + +src_reg +vec4_visitor::emit_untyped_surface_header() +{ + return src_reg(); } vec4_visitor::vec4_visitor(struct brw_context *brw, @@ -3494,81 +1054,26 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, void *mem_ctx, bool debug_flag, bool no_spills, - shader_time_shader_type st_base, - shader_time_shader_type st_written, - shader_time_shader_type st_reset) - : backend_visitor(brw, shader_prog, prog, &prog_data->base, stage), + shader_time_shader_type st_type) + : backend_visitor(brw, shader_prog, prog, &prog_data->base, mem_ctx, stage, + debug_flag, false, + vec4_builder(brw, mem_ctx, alloc, instructions), + st_type, + /* Initialize uniform_array_size to at least 1 because + * pre-gen6 VS requires at least one. See + * setup_uniforms() in brw_vec4.cpp. + */ + MAX2(prog_data->base.nr_params, 1)), c(c), key(key), prog_data(prog_data), sanity_param_count(0), - fail_msg(NULL), - first_non_payload_grf(0), need_all_constants_in_pull_buffer(false), debug_flag(debug_flag), - no_spills(no_spills), - st_base(st_base), - st_written(st_written), - st_reset(st_reset) + no_spills(no_spills) { - this->mem_ctx = mem_ctx; - this->failed = false; - - this->base_ir = NULL; - this->current_annotation = NULL; memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation)); - - this->variable_ht = hash_table_ctor(0, - hash_table_pointer_hash, - hash_table_pointer_compare); - - this->virtual_grf_start = NULL; - this->virtual_grf_end = NULL; this->live_intervals_valid = false; - - this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; - - this->uniforms = 0; - - /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires - * at least one. See setup_uniforms() in brw_vec4.cpp. - */ - this->uniform_array_size = 1; - if (prog_data) { - this->uniform_array_size = MAX2(stage_prog_data->nr_params, 1); - } - - this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); - this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); -} - -vec4_visitor::~vec4_visitor() -{ - hash_table_dtor(this->variable_ht); -} - - -void -vec4_visitor::fail(const char *format, ...) -{ - va_list va; - char *msg; - - if (failed) - return; - - failed = true; - - va_start(va, format); - msg = ralloc_vasprintf(mem_ctx, format, va); - va_end(va); - msg = ralloc_asprintf(mem_ctx, "vec4 compile failed: %s\n", msg); - - this->fail_msg = msg; - - if (debug_flag) { - fprintf(stderr, "%s", msg); - } } } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp index 5d9027b2ea6..304d3f0015c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp @@ -43,10 +43,10 @@ vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod, { vec4_instruction *inst; - inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1); + inst = bld.emit(BRW_OPCODE_CMP, bld.reg_null_d(), src0, src1); inst->conditional_mod = conditional_mod; - inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f)); + inst = bld.emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f)); inst->predicate = BRW_PREDICATE_NORMAL; } @@ -68,11 +68,11 @@ vec4_vs_visitor::emit_program_code() * mov.f0 dst 1.0 */ src_reg one = src_reg(this, glsl_type::float_type); - emit(MOV(dst_reg(one), src_reg(1.0f))); + bld.MOV(dst_reg(one), src_reg(1.0f)); for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) { const struct prog_instruction *vpi = &prog->Instructions[insn]; - base_ir = vpi; + bld.set_base_ir(vpi); dst_reg dst; src_reg src[3]; @@ -89,11 +89,11 @@ vec4_vs_visitor::emit_program_code() case OPCODE_ABS: src[0].abs = true; src[0].negate = false; - emit(MOV(dst, src[0])); + bld.MOV(dst, src[0]); break; case OPCODE_ADD: - emit(ADD(dst, src[0], src[1])); + bld.ADD(dst, src[0], src[1]); break; case OPCODE_ARL: @@ -102,40 +102,40 @@ vec4_vs_visitor::emit_program_code() dst_reg dst_f = dst; dst_f.type = BRW_REGISTER_TYPE_F; - emit(RNDD(dst_f, src[0])); - emit(MOV(dst, src_reg(dst_f))); + bld.RNDD(dst_f, src[0]); + bld.MOV(dst, src_reg(dst_f)); } else { - emit(RNDD(dst, src[0])); + bld.RNDD(dst, src[0]); } break; case OPCODE_DP3: - emit(DP3(dst, src[0], src[1])); + bld.DP3(dst, src[0], src[1]); break; case OPCODE_DP4: - emit(DP4(dst, src[0], src[1])); + bld.DP4(dst, src[0], src[1]); break; case OPCODE_DPH: - emit(DPH(dst, src[0], src[1])); + bld.DPH(dst, src[0], src[1]); break; case OPCODE_DST: { dst_reg t = dst; if (vpi->DstReg.WriteMask & WRITEMASK_X) { t.writemask = WRITEMASK_X; - emit(MOV(t, src_reg(1.0f))); + bld.MOV(t, src_reg(1.0f)); } if (vpi->DstReg.WriteMask & WRITEMASK_Y) { t.writemask = WRITEMASK_Y; - emit(MUL(t, src[0], src[1])); + bld.MUL(t, src[0], src[1]); } if (vpi->DstReg.WriteMask & WRITEMASK_Z) { t.writemask = WRITEMASK_Z; - emit(MOV(t, src[0])); + bld.MOV(t, src[0]); } if (vpi->DstReg.WriteMask & WRITEMASK_W) { t.writemask = WRITEMASK_W; - emit(MOV(t, src[1])); + bld.MOV(t, src[1]); } break; } @@ -146,46 +146,46 @@ vec4_vs_visitor::emit_program_code() /* tmp_d = floor(src[0].x) */ src_reg tmp_d = src_reg(this, glsl_type::ivec4_type); assert(tmp_d.type == BRW_REGISTER_TYPE_D); - emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX))); + bld.RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)); /* result[0] = 2.0 ^ tmp */ /* Adjust exponent for floating point: exp += 127 */ dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X); - emit(ADD(tmp_d_x, tmp_d, src_reg(127))); + bld.ADD(tmp_d_x, tmp_d, src_reg(127)); /* Install exponent and sign. Excess drops off the edge: */ dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X); - emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23)); + bld.emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23)); } if (vpi->DstReg.WriteMask & WRITEMASK_Y) { result.writemask = WRITEMASK_Y; - emit(FRC(result, src[0])); + bld.FRC(result, src[0]); } if (vpi->DstReg.WriteMask & WRITEMASK_Z) { result.writemask = WRITEMASK_Z; - emit_math(SHADER_OPCODE_EXP2, result, src[0]); + bld.emit_math(SHADER_OPCODE_EXP2, result, src[0]); } if (vpi->DstReg.WriteMask & WRITEMASK_W) { result.writemask = WRITEMASK_W; - emit(MOV(result, src_reg(1.0f))); + bld.MOV(result, src_reg(1.0f)); } break; } case OPCODE_EX2: - emit_math(SHADER_OPCODE_EXP2, dst, src[0]); + bld.emit_math(SHADER_OPCODE_EXP2, dst, src[0]); break; case OPCODE_FLR: - emit(RNDD(dst, src[0])); + bld.RNDD(dst, src[0]); break; case OPCODE_FRC: - emit(FRC(dst, src[0])); + bld.FRC(dst, src[0]); break; case OPCODE_LG2: - emit_math(SHADER_OPCODE_LOG2, dst, src[0]); + bld.emit_math(SHADER_OPCODE_LOG2, dst, src[0]); break; case OPCODE_LIT: { @@ -207,36 +207,36 @@ vec4_vs_visitor::emit_program_code() */ if (vpi->DstReg.WriteMask & WRITEMASK_XW) { result.writemask = WRITEMASK_XW; - emit(MOV(result, src_reg(1.0f))); + bld.MOV(result, src_reg(1.0f)); } if (vpi->DstReg.WriteMask & WRITEMASK_YZ) { result.writemask = WRITEMASK_YZ; - emit(MOV(result, src_reg(0.0f))); + bld.MOV(result, src_reg(0.0f)); src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX); - emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.CMP(bld.reg_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G); + bld.IF(BRW_PREDICATE_NORMAL); if (vpi->DstReg.WriteMask & WRITEMASK_Y) { result.writemask = WRITEMASK_Y; - emit(MOV(result, tmp_x)); + bld.MOV(result, tmp_x); } if (vpi->DstReg.WriteMask & WRITEMASK_Z) { /* if (tmp.y < 0) tmp.y = 0; */ src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY); result.writemask = WRITEMASK_Z; - emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f)); + bld.emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f)); src_reg clamped_y(result); clamped_y.swizzle = BRW_SWIZZLE_ZZZZ; src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW); - emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w); + bld.emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); } break; } @@ -260,19 +260,19 @@ vec4_vs_visitor::emit_program_code() */ if (vpi->DstReg.WriteMask & WRITEMASK_XZ) { result.writemask = WRITEMASK_X; - emit(AND(result, arg0_ud, src_reg((1u << 31) - 1))); - emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u)); + bld.AND(result, arg0_ud, src_reg((1u << 31) - 1)); + bld.emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u)); src_reg result_d(result_src); result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */ result.type = BRW_REGISTER_TYPE_F; - emit(ADD(result, result_d, src_reg(-127))); + bld.ADD(result, result_d, src_reg(-127)); } if (vpi->DstReg.WriteMask & WRITEMASK_YZ) { result.writemask = WRITEMASK_Y; result.type = BRW_REGISTER_TYPE_UD; - emit(AND(result, arg0_ud, src_reg((1u << 23) - 1))); - emit(OR(result, result_src, src_reg(127u << 23))); + bld.AND(result, arg0_ud, src_reg((1u << 23) - 1)); + bld.OR(result, result_src, src_reg(127u << 23)); } if (vpi->DstReg.WriteMask & WRITEMASK_Z) { @@ -294,51 +294,51 @@ vec4_vs_visitor::emit_program_code() result_x.swizzle = BRW_SWIZZLE_XXXX; result_y.swizzle = BRW_SWIZZLE_YYYY; result_z.swizzle = BRW_SWIZZLE_ZZZZ; - emit_math(SHADER_OPCODE_LOG2, result, result_y); - emit(ADD(result, result_z, result_x)); + bld.emit_math(SHADER_OPCODE_LOG2, result, result_y); + bld.ADD(result, result_z, result_x); } if (vpi->DstReg.WriteMask & WRITEMASK_W) { result.type = BRW_REGISTER_TYPE_F; result.writemask = WRITEMASK_W; - emit(MOV(result, src_reg(1.0f))); + bld.MOV(result, src_reg(1.0f)); } break; } case OPCODE_MAD: { src_reg temp = src_reg(this, glsl_type::vec4_type); - emit(MUL(dst_reg(temp), src[0], src[1])); - emit(ADD(dst, temp, src[2])); + bld.MUL(dst_reg(temp), src[0], src[1]); + bld.ADD(dst, temp, src[2]); break; } case OPCODE_MAX: - emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]); + bld.emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]); break; case OPCODE_MIN: - emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]); + bld.emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]); break; case OPCODE_MOV: - emit(MOV(dst, src[0])); + bld.MOV(dst, src[0]); break; case OPCODE_MUL: - emit(MUL(dst, src[0], src[1])); + bld.MUL(dst, src[0], src[1]); break; case OPCODE_POW: - emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]); + bld.emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]); break; case OPCODE_RCP: - emit_math(SHADER_OPCODE_RCP, dst, src[0]); + bld.emit_math(SHADER_OPCODE_RCP, dst, src[0]); break; case OPCODE_RSQ: - emit_math(SHADER_OPCODE_RSQ, dst, src[0]); + bld.emit_math(SHADER_OPCODE_RSQ, dst, src[0]); break; case OPCODE_SGE: @@ -352,7 +352,7 @@ vec4_vs_visitor::emit_program_code() case OPCODE_SUB: { src_reg neg_src1 = src[1]; neg_src1.negate = !src[1].negate; - emit(ADD(dst, src[0], neg_src1)); + bld.ADD(dst, src[0], neg_src1); break; } @@ -360,21 +360,21 @@ vec4_vs_visitor::emit_program_code() /* Note that SWZ's extended swizzles are handled in the general * get_src_reg() code. */ - emit(MOV(dst, src[0])); + bld.MOV(dst, src[0]); break; case OPCODE_XPD: { src_reg t1 = src_reg(this, glsl_type::vec4_type); src_reg t2 = src_reg(this, glsl_type::vec4_type); - emit(MUL(dst_reg(t1), + bld.MUL(dst_reg(t1), swizzle(src[0], BRW_SWIZZLE_YZXW), - swizzle(src[1], BRW_SWIZZLE_ZXYW))); - emit(MUL(dst_reg(t2), + swizzle(src[1], BRW_SWIZZLE_ZXYW)); + bld.MUL(dst_reg(t2), swizzle(src[0], BRW_SWIZZLE_ZXYW), - swizzle(src[1], BRW_SWIZZLE_YZXW))); + swizzle(src[1], BRW_SWIZZLE_YZXW)); t2.negate = true; - emit(ADD(dst, t1, t2)); + bld.ADD(dst, t1, t2); break; } @@ -388,7 +388,7 @@ vec4_vs_visitor::emit_program_code() /* Copy the temporary back into the actual destination register. */ if (vpi->Opcode != OPCODE_END) { - emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst))); + bld.MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)); } } @@ -475,7 +475,7 @@ vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst) } case PROGRAM_UNDEFINED: - return dst_null_f(); + return bld.reg_null_f(); default: unreachable("vec4_vp: bad destination register file"); @@ -530,10 +530,10 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) src_reg reladdr = src_reg(this, glsl_type::int_type); dst_reg dst_reladdr = dst_reg(reladdr); dst_reladdr.writemask = WRITEMASK_X; - emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index))); + bld.ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)); if (brw->gen < 6) - emit(MUL(dst_reladdr, reladdr, src_reg(16))); + bld.MUL(dst_reladdr, reladdr, src_reg(16)); #if 0 assert(src.Index < this->uniforms); @@ -547,17 +547,14 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start)); vec4_instruction *load; if (brw->gen >= 7) { - load = new(mem_ctx) - vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - dst_reg(result), surf_index, reladdr); + load = bld.emit(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + dst_reg(result), surf_index, reladdr); } else { - load = new(mem_ctx) - vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, - dst_reg(result), surf_index, reladdr); + load = bld.emit(VS_OPCODE_PULL_CONSTANT_LOAD, + dst_reg(result), surf_index, reladdr); load->base_mrf = 14; load->mlen = 1; } - emit(load); break; } @@ -571,7 +568,7 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) for (int i = 0; i < 4; i++) { dst_reg t = dst_reg(result); t.writemask = 1 << i; - emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f))); + bld.MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)); } break; @@ -636,24 +633,24 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) if (src_mask) { temp.writemask = src_mask; - emit(MOV(temp, result)); + bld.MOV(temp, result); } if (zeros_mask) { temp.writemask = zeros_mask; - emit(MOV(temp, src_reg(0.0f))); + bld.MOV(temp, src_reg(0.0f)); } if (ones_mask) { temp.writemask = ones_mask; - emit(MOV(temp, src_reg(1.0f))); + bld.MOV(temp, src_reg(1.0f)); } if (src.Negate) { temp.writemask = src.Negate; src_reg neg(temp_src); neg.negate = true; - emit(MOV(temp, neg)); + bld.MOV(temp, neg); } result = temp_src; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 72b6ef03b42..ac544354d6a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -51,7 +51,7 @@ vec4_vs_visitor::emit_prolog() dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1; - emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); + bld.MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)); } /* Do sign recovery for 2101010 formats if required. */ @@ -59,19 +59,19 @@ vec4_vs_visitor::emit_prolog() if (sign_recovery_shift.file == BAD_FILE) { /* shift constant: <22,22,22,30> */ sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type); - emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u))); - emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u))); + bld.MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)); + bld.MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)); } - emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift))); - emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift))); + bld.SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)); + bld.ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)); } /* Apply BGRA swizzle if required. */ if (wa_flags & BRW_ATTRIB_WA_BGRA) { src_reg temp = src_reg(reg); temp.swizzle = BRW_SWIZZLE4(2,1,0,3); - emit(MOV(reg, temp)); + bld.MOV(reg, temp); } if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) { @@ -87,17 +87,17 @@ vec4_vs_visitor::emit_prolog() if (es3_normalize_factor.file == BAD_FILE) { /* mul constant: 1 / (2^(b-1) - 1) */ es3_normalize_factor = dst_reg(this, glsl_type::vec4_type); - emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ), - src_reg(1.0f / ((1<<9) - 1)))); - emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W), - src_reg(1.0f / ((1<<1) - 1)))); + bld.MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ), + src_reg(1.0f / ((1<<9) - 1))); + bld.MOV(writemask(es3_normalize_factor, WRITEMASK_W), + src_reg(1.0f / ((1<<1) - 1))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); - emit(MOV(dst, src_reg(reg_d))); - emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor))); - emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f)); + bld.MOV(dst, src_reg(reg_d)); + bld.MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)); + bld.emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f)); } else { /* The following equations are from the OpenGL 3.2 specification: * @@ -113,30 +113,30 @@ vec4_vs_visitor::emit_prolog() if (normalize_factor.file == BAD_FILE) { /* 1 / (2^b - 1) for b=<10,10,10,2> */ normalize_factor = dst_reg(this, glsl_type::vec4_type); - emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ), - src_reg(1.0f / ((1<<10) - 1)))); - emit(MOV(writemask(normalize_factor, WRITEMASK_W), - src_reg(1.0f / ((1<<2) - 1)))); + bld.MOV(writemask(normalize_factor, WRITEMASK_XYZ), + src_reg(1.0f / ((1<<10) - 1))); + bld.MOV(writemask(normalize_factor, WRITEMASK_W), + src_reg(1.0f / ((1<<2) - 1))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); - emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud))); + bld.MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)); /* For signed normalization, we want the numerator to be 2c+1. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { - emit(MUL(dst, src_reg(dst), src_reg(2.0f))); - emit(ADD(dst, src_reg(dst), src_reg(1.0f))); + bld.MUL(dst, src_reg(dst), src_reg(2.0f)); + bld.ADD(dst, src_reg(dst), src_reg(1.0f)); } - emit(MUL(dst, src_reg(dst), src_reg(normalize_factor))); + bld.MUL(dst, src_reg(dst), src_reg(normalize_factor)); } } if (wa_flags & BRW_ATTRIB_WA_SCALE) { dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); - emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud))); + bld.MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)); } } } @@ -193,7 +193,7 @@ vec4_vs_visitor::emit_urb_write_opcode(bool complete) emit_shader_time_end(); } - vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + vec4_instruction *inst = bld.emit(VS_OPCODE_URB_WRITE); inst->urb_write_flags = complete ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS; @@ -221,7 +221,7 @@ vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw, &vs_compile->key.base, &vs_prog_data->base, prog, MESA_SHADER_VERTEX, mem_ctx, INTEL_DEBUG & DEBUG_VS, false /* no_spills */, - ST_VS, ST_VS_WRITTEN, ST_VS_RESET), + ST_VS), vs_compile(vs_compile), vs_prog_data(vs_prog_data) { diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index d16cc6ed8b7..f06da953bcf 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -70,20 +70,20 @@ gen6_gs_visitor::emit_prolog() * flags for the next vertex come right after the data items and flags for * the previous vertex. */ - this->current_annotation = "gen6 prolog"; + bld.set_annotation("gen6 prolog"); this->vertex_output = src_reg(this, glsl_type::uint_type, (prog_data->vue_map.num_slots + 1) * c->gp->program.VerticesOut); this->vertex_output_offset = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u))); + bld.MOV(dst_reg(this->vertex_output_offset), src_reg(0u)); /* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES), * so initialize it once to R0. */ - vec4_instruction *inst = emit(MOV(dst_reg(MRF, 1), + vec4_instruction *inst = bld.MOV(dst_reg(MRF, 1), retype(brw_vec8_grf(0, 0), - BRW_REGISTER_TYPE_UD))); + BRW_REGISTER_TYPE_UD)); inst->force_writemask_all = true; /* This will be used as a temporary to store writeback data of FF_SYNC @@ -98,13 +98,13 @@ gen6_gs_visitor::emit_prolog() * headers. */ this->first_vertex = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START)); + bld.MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START); /* The FF_SYNC message requires to know the number of primitives generated, * so keep a counter for this. */ this->prim_count = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->prim_count), 0u)); + bld.MOV(dst_reg(this->prim_count), 0u); if (c->prog_data.gen6_xfb_enabled) { /* Create a virtual register to hold destination indices in SOL */ @@ -115,8 +115,8 @@ gen6_gs_visitor::emit_prolog() this->svbi = src_reg(this, glsl_type::uvec4_type); /* Create a virtual register to hold max values of SVBI */ this->max_svbi = src_reg(this, glsl_type::uvec4_type); - emit(MOV(dst_reg(this->max_svbi), - src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD)))); + bld.MOV(dst_reg(this->max_svbi), + src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))); xfb_setup(); } @@ -142,21 +142,21 @@ gen6_gs_visitor::emit_prolog() if (c->prog_data.include_primitive_id) { this->primitive_id = src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); - emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id)); + bld.emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id)); } } void gen6_gs_visitor::visit(ir_emit_vertex *) { - this->current_annotation = "gen6 emit vertex"; + bld.set_annotation("gen6 emit vertex"); /* Honor max_vertex layout indication in geometry shader by ignoring any * vertices coming after c->gp->program.VerticesOut. */ unsigned num_output_vertices = c->gp->program.VerticesOut; - emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices), - BRW_CONDITIONAL_L)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.CMP(bld.reg_null_d(), this->vertex_count, src_reg(num_output_vertices), + BRW_CONDITIONAL_L); + bld.IF(BRW_PREDICATE_NORMAL); { /* Buffer all output slots for this vertex in vertex_output */ for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { @@ -183,12 +183,12 @@ gen6_gs_visitor::visit(ir_emit_vertex *) dst_reg dst(this->vertex_output); dst.reladdr = ralloc(mem_ctx, src_reg); memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); - vec4_instruction *inst = emit(MOV(dst, src_reg(tmp))); + vec4_instruction *inst = bld.MOV(dst, src_reg(tmp)); inst->force_writemask_all = true; } - emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + bld.ADD(dst_reg(this->vertex_output_offset), + this->vertex_output_offset, 1u); } /* Now buffer flags for this vertex */ @@ -199,32 +199,32 @@ gen6_gs_visitor::visit(ir_emit_vertex *) /* If we are outputting points, then every vertex has PrimStart and * PrimEnd set. */ - emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | - URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)); - emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u)); + bld.MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | + URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); + bld.ADD(dst_reg(this->prim_count), this->prim_count, 1u); } else { /* Otherwise, we can only set the PrimStart flag, which we have stored * in the first_vertex register. We will have to wait until we execute * EndPrimitive() or we end the thread to set the PrimEnd flag on a * vertex. */ - emit(OR(dst, this->first_vertex, - (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT))); - emit(MOV(dst_reg(this->first_vertex), 0u)); + bld.OR(dst, this->first_vertex, + (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)); + bld.MOV(dst_reg(this->first_vertex), 0u); } - emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + bld.ADD(dst_reg(this->vertex_output_offset), + this->vertex_output_offset, 1u); /* Update vertex count */ - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u)); + bld.ADD(dst_reg(this->vertex_count), this->vertex_count, 1u); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); } void gen6_gs_visitor::visit(ir_end_primitive *) { - this->current_annotation = "gen6 end primitive"; + bld.set_annotation("gen6 end primitive"); /* Calling EndPrimitive() is optional for point output. In this case we set * the PrimEnd flag when we process EmitVertex(). */ @@ -241,40 +241,40 @@ gen6_gs_visitor::visit(ir_end_primitive *) * below). */ unsigned num_output_vertices = c->gp->program.VerticesOut; - emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1), - BRW_CONDITIONAL_L)); - vec4_instruction *inst = emit(CMP(dst_null_d(), + bld.CMP(bld.reg_null_d(), this->vertex_count, src_reg(num_output_vertices + 1), + BRW_CONDITIONAL_L); + vec4_instruction *inst = bld.CMP(bld.reg_null_d(), this->vertex_count, 0u, - BRW_CONDITIONAL_NEQ)); + BRW_CONDITIONAL_NEQ); inst->predicate = BRW_PREDICATE_NORMAL; - emit(IF(BRW_PREDICATE_NORMAL)); + bld.IF(BRW_PREDICATE_NORMAL); { /* vertex_output_offset is already pointing at the first entry of the * next vertex. So subtract 1 to modify the flags for the previous * vertex. */ src_reg offset(this, glsl_type::uint_type); - emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1))); + bld.ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1)); src_reg dst(this->vertex_output); dst.reladdr = ralloc(mem_ctx, src_reg); memcpy(dst.reladdr, &offset, sizeof(src_reg)); - emit(OR(dst_reg(dst), dst, URB_WRITE_PRIM_END)); - emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u)); + bld.OR(dst_reg(dst), dst, URB_WRITE_PRIM_END); + bld.ADD(dst_reg(this->prim_count), this->prim_count, 1u); /* Set the first vertex flag to indicate that the next vertex will start * a primitive. */ - emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START)); + bld.MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); } void gen6_gs_visitor::emit_urb_write_header(int mrf) { - this->current_annotation = "gen6 urb header"; + bld.set_annotation("gen6 urb header"); /* Compute offset of the flags for the current vertex in vertex_output and * write them in dw2 of the message header. * @@ -284,14 +284,14 @@ gen6_gs_visitor::emit_urb_write_header(int mrf) * slots per vertex to that offset to obtain the flags data offset. */ src_reg flags_offset(this, glsl_type::uint_type); - emit(ADD(dst_reg(flags_offset), - this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots))); + bld.ADD(dst_reg(flags_offset), + this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots)); src_reg flags_data(this->vertex_output); flags_data.reladdr = ralloc(mem_ctx, src_reg); memcpy(flags_data.reladdr, &flags_offset, sizeof(src_reg)); - emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data); + bld.emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data); } void @@ -302,7 +302,7 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf, if (!complete) { /* If the vertex is not complete we don't have to do anything special */ - inst = emit(GS_OPCODE_URB_WRITE); + inst = bld.emit(GS_OPCODE_URB_WRITE); inst->urb_write_flags = BRW_URB_WRITE_NO_FLAGS; } else { /* Otherwise we always request to allocate a new VUE handle. If this is @@ -313,7 +313,7 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf, * which would require to end the program with an IF/ELSE/ENDIF block, * something we do not want. */ - inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE); + inst = bld.emit(GS_OPCODE_URB_WRITE_ALLOCATE); inst->urb_write_flags = BRW_URB_WRITE_COMPLETE; inst->dst = dst_reg(MRF, base_mrf); inst->src[0] = this->temp; @@ -339,12 +339,12 @@ gen6_gs_visitor::emit_thread_end() * points because in the point case we set PrimEnd on all vertices. */ if (c->gp->program.OutputType != GL_POINTS) { - emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.CMP(bld.reg_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z); + bld.IF(BRW_PREDICATE_NORMAL); { visit((ir_end_primitive *) NULL); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); } /* Here we have to: @@ -367,38 +367,38 @@ gen6_gs_visitor::emit_thread_end() int max_usable_mrf = 13; /* Issue the FF_SYNC message and obtain the initial VUE handle. */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.CMP(bld.reg_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G); + bld.IF(BRW_PREDICATE_NORMAL); { - this->current_annotation = "gen6 thread end: ff_sync"; + bld.set_annotation("gen6 thread end: ff_sync"); vec4_instruction *inst; if (c->prog_data.gen6_xfb_enabled) { src_reg sol_temp(this, glsl_type::uvec4_type); - emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES, + bld.emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES, dst_reg(this->svbi), this->vertex_count, this->prim_count, sol_temp); - inst = emit(GS_OPCODE_FF_SYNC, + inst = bld.emit(GS_OPCODE_FF_SYNC, dst_reg(this->temp), this->prim_count, this->svbi); } else { - inst = emit(GS_OPCODE_FF_SYNC, + inst = bld.emit(GS_OPCODE_FF_SYNC, dst_reg(this->temp), this->prim_count, brw_imm_ud(0u)); } inst->base_mrf = base_mrf; /* Loop over all buffered vertices and emit URB write messages */ - this->current_annotation = "gen6 thread end: urb writes init"; + bld.set_annotation("gen6 thread end: urb writes init"); src_reg vertex(this, glsl_type::uint_type); - emit(MOV(dst_reg(vertex), 0u)); - emit(MOV(dst_reg(this->vertex_output_offset), 0u)); + bld.MOV(dst_reg(vertex), 0u); + bld.MOV(dst_reg(this->vertex_output_offset), 0u); - this->current_annotation = "gen6 thread end: urb writes"; - emit(BRW_OPCODE_DO); + bld.set_annotation("gen6 thread end: urb writes"); + bld.emit(BRW_OPCODE_DO); { - emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE)); - inst = emit(BRW_OPCODE_BREAK); + bld.CMP(bld.reg_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE); + inst = bld.emit(BRW_OPCODE_BREAK); inst->predicate = BRW_PREDICATE_NORMAL; /* First we prepare the message header */ @@ -417,7 +417,7 @@ gen6_gs_visitor::emit_thread_end() for (; slot < prog_data->vue_map.num_slots; ++slot) { int varying = prog_data->vue_map.slot_to_varying[slot]; - current_annotation = output_reg_annotation[varying]; + bld.set_annotation(output_reg_annotation[varying]); /* Compute offset of this slot for the current vertex * in vertex_output @@ -431,12 +431,12 @@ gen6_gs_visitor::emit_thread_end() dst_reg reg = dst_reg(MRF, mrf); reg.type = output_reg[varying].type; data.type = reg.type; - vec4_instruction *inst = emit(MOV(reg, data)); + vec4_instruction *inst = bld.MOV(reg, data); inst->force_writemask_all = true; mrf++; - emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + bld.ADD(dst_reg(this->vertex_output_offset), + this->vertex_output_offset, 1u); /* If this was max_usable_mrf, we can't fit anything more into * this URB WRITE. @@ -455,17 +455,17 @@ gen6_gs_visitor::emit_thread_end() * to the first data item of the next vertex, so that we can start * writing the next vertex. */ - emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + bld.ADD(dst_reg(this->vertex_output_offset), + this->vertex_output_offset, 1u); - emit(ADD(dst_reg(vertex), vertex, 1u)); + bld.ADD(dst_reg(vertex), vertex, 1u); } - emit(BRW_OPCODE_WHILE); + bld.emit(BRW_OPCODE_WHILE); if (c->prog_data.gen6_xfb_enabled) xfb_write(); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); /* Finally, emit EOT message. * @@ -482,17 +482,17 @@ gen6_gs_visitor::emit_thread_end() * which works for both cases by setting the COMPLETE and UNUSED flags in * the EOT message. */ - this->current_annotation = "gen6 thread end: EOT"; + bld.set_annotation("gen6 thread end: EOT"); if (c->prog_data.gen6_xfb_enabled) { /* When emitting EOT, set SONumPrimsWritten Increment Value. */ src_reg data(this, glsl_type::uint_type); - emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu))); - emit(SHL(dst_reg(data), data, brw_imm_ud(16u))); - emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data); + bld.AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu)); + bld.SHL(dst_reg(data), data, brw_imm_ud(16u)); + bld.emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data); } - vec4_instruction *inst = emit(GS_OPCODE_THREAD_END); + vec4_instruction *inst = bld.emit(GS_OPCODE_THREAD_END); inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED; inst->base_mrf = base_mrf; inst->mlen = 1; @@ -610,10 +610,10 @@ gen6_gs_visitor::xfb_write() unreachable("Unexpected primitive type in Gen6 SOL program."); } - this->current_annotation = "gen6 thread end: svb writes init"; + bld.set_annotation("gen6 thread end: svb writes init"); - emit(MOV(dst_reg(this->vertex_output_offset), 0u)); - emit(MOV(dst_reg(this->sol_prim_written), 0u)); + bld.MOV(dst_reg(this->vertex_output_offset), 0u); + bld.MOV(dst_reg(this->sol_prim_written), 0u); /* Check that at least one primitive can be written * @@ -624,37 +624,37 @@ gen6_gs_visitor::xfb_write() * transform feedback is in interleaved or separate attribs mode. */ src_reg sol_temp(this, glsl_type::uvec4_type); - emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts))); + bld.ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts)); /* Compare SVBI calculated number with the maximum value, which is * in R1.4 (previously saved in this->max_svbi) for gen6. */ - emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.CMP(bld.reg_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE); + bld.IF(BRW_PREDICATE_NORMAL); { - struct src_reg destination_indices_uw = + src_reg destination_indices_uw = retype(destination_indices, BRW_REGISTER_TYPE_UW); - vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw), - brw_imm_v(0x00020100))); /* (0, 1, 2) */ + vec4_instruction *inst = bld.MOV(dst_reg(destination_indices_uw), + brw_imm_v(0x00020100)); /* (0, 1, 2) */ inst->force_writemask_all = true; - emit(ADD(dst_reg(this->destination_indices), + bld.ADD(dst_reg(this->destination_indices), this->destination_indices, - this->svbi)); + this->svbi); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); /* Write transform feedback data for all processed vertices. */ for (int i = 0; i < c->gp->program.VerticesOut; i++) { - emit(MOV(dst_reg(sol_temp), i)); - emit(CMP(dst_null_d(), sol_temp, this->vertex_count, - BRW_CONDITIONAL_L)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.MOV(dst_reg(sol_temp), i); + bld.CMP(bld.reg_null_d(), sol_temp, this->vertex_count, + BRW_CONDITIONAL_L); + bld.IF(BRW_PREDICATE_NORMAL); { xfb_program(i, num_verts); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); } } @@ -670,16 +670,16 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) /* Check for buffer overflow: we need room to write the complete primitive * (all vertices). Otherwise, avoid writing any vertices for it */ - emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u)); - emit(MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts))); - emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi)); - emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE)); - emit(IF(BRW_PREDICATE_NORMAL)); + bld.ADD(dst_reg(sol_temp), this->sol_prim_written, 1u); + bld.MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts)); + bld.ADD(dst_reg(sol_temp), sol_temp, this->svbi); + bld.CMP(bld.reg_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE); + bld.IF(BRW_PREDICATE_NORMAL); { /* Avoid overwriting MRF 1 as it is used as URB write message header */ dst_reg mrf_reg(MRF, 2); - this->current_annotation = "gen6: emit SOL vertex data"; + bld.set_annotation("gen6: emit SOL vertex data"); /* For each vertex, generate code to output each varying using the * appropriate binding table entry. */ @@ -688,7 +688,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) prog_data->transform_feedback_bindings[binding]; /* Set up the correct destination index for this vertex */ - vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX, + vec4_instruction *inst = bld.emit(GS_OPCODE_SVB_SET_DST_INDEX, mrf_reg, this->destination_indices); inst->sol_vertex = vertex % num_verts; @@ -705,11 +705,11 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) /* Compute offset of this varying for the current vertex * in vertex_output */ - this->current_annotation = output_reg_annotation[varying]; + bld.set_annotation(output_reg_annotation[varying]); src_reg data(this->vertex_output); data.reladdr = ralloc(mem_ctx, src_reg); int offset = get_vertex_output_offset_for_varying(vertex, varying); - emit(MOV(dst_reg(this->vertex_output_offset), offset)); + bld.MOV(dst_reg(this->vertex_output_offset), offset); memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg)); data.type = output_reg[varying].type; @@ -726,7 +726,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) data.swizzle = prog_data->transform_feedback_swizzles[binding]; /* Write data */ - inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp); + inst = bld.emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp); inst->sol_binding = binding; inst->sol_final_write = final_write; @@ -734,17 +734,17 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) /* This is the last vertex of the primitive, then increment * SO num primitive counter and destination indices. */ - emit(ADD(dst_reg(this->destination_indices), + bld.ADD(dst_reg(this->destination_indices), this->destination_indices, - brw_imm_ud(num_verts))); - emit(ADD(dst_reg(this->sol_prim_written), - this->sol_prim_written, 1u)); + brw_imm_ud(num_verts)); + bld.ADD(dst_reg(this->sol_prim_written), + this->sol_prim_written, 1u); } } - this->current_annotation = NULL; + bld.set_annotation(NULL); } - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); } int |