summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2014-10-28 15:59:34 +0200
committerFrancisco Jerez <currojerez@riseup.net>2014-10-30 16:39:53 +0200
commita841b3c0cb61b11f993eaa52e75ae72daa4d5fa4 (patch)
tree859e614042badaee0feeb510f6f3fbc089ccb421
parentd46cf50e4ce13b478544de223ec64302ab832d59 (diff)
i965: Unify most of the visiting code in the VEC4 and FS visitors.i965-unified-visitor
The VEC4 and FS visitor classes are still huge and there's still a lot that could be unified -- Most of what is left doesn't have much to do with visiting though.
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h24
-rw-r--r--src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_dead_control_flow.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp926
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h222
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp19
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_fp.cpp154
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp91
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp61
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp2337
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h13
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_visitor.cpp190
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_visitor.h2353
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c29
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp36
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp118
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h54
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp195
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h268
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp132
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp3071
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vp.cpp145
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp48
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp212
36 files changed, 3760 insertions, 6993 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 9c006daa0e3..d61193f8970 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -70,6 +70,7 @@ i965_FILES = \
brw_gs_state.c \
brw_gs_surface_state.c \
brw_interpolation_map.c \
+ brw_ir_visitor.cpp \
brw_lower_texture_gradients.cpp \
brw_lower_unnormalized_offset.cpp \
brw_meta_updownsample.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index bb49a0ae955..7af127f5fee 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -143,7 +143,7 @@ bblock_t::combine_with(bblock_t *that)
}
void
-bblock_t::dump(backend_visitor *v) const
+bblock_t::dump(brw::base_visitor *v) const
{
int ip = this->start_ip;
foreach_inst_in_block(backend_instruction, inst, this) {
@@ -422,7 +422,7 @@ cfg_t::make_block_array()
}
void
-cfg_t::dump(backend_visitor *v) const
+cfg_t::dump(brw::base_visitor *v) const
{
foreach_block (block, this) {
fprintf(stderr, "START B%d", block->num);
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
index c06ed61a79f..6e27027e41a 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -31,6 +31,10 @@
#include "brw_shader.h"
+#ifdef __cplusplus
+#include "brw_ir_visitor.h"
+#endif
+
struct bblock_t;
struct bblock_link {
@@ -60,7 +64,7 @@ struct bblock_t {
bool is_successor_of(const bblock_t *block) const;
bool can_combine_with(const bblock_t *that) const;
void combine_with(bblock_t *that);
- void dump(backend_visitor *v) const;
+ void dump(brw::base_visitor *v) const;
backend_instruction *start();
const backend_instruction *start() const;
@@ -204,7 +208,7 @@ struct cfg_t {
void set_next_block(bblock_t **cur, bblock_t *block, int ip);
void make_block_array();
- void dump(backend_visitor *v) const;
+ void dump(brw::base_visitor *v) const;
#endif
void *mem_ctx;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 884e28bf8b4..4a1ffdc5b8a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -757,20 +757,20 @@ struct brw_tracked_state {
void (*emit)( struct brw_context *brw );
};
+enum shader_time_shader_entry {
+ ST_BASE,
+ ST_WRITTEN,
+ ST_RESET,
+ ST_SUM,
+ ST_NUM_ENTRIES
+};
+
enum shader_time_shader_type {
ST_NONE,
- ST_VS,
- ST_VS_WRITTEN,
- ST_VS_RESET,
- ST_GS,
- ST_GS_WRITTEN,
- ST_GS_RESET,
- ST_FS8,
- ST_FS8_WRITTEN,
- ST_FS8_RESET,
- ST_FS16,
- ST_FS16_WRITTEN,
- ST_FS16_RESET,
+ ST_VS = ST_NONE + ST_NUM_ENTRIES,
+ ST_GS = ST_VS + ST_NUM_ENTRIES,
+ ST_FS8 = ST_GS + ST_NUM_ENTRIES,
+ ST_FS16 = ST_FS8 + ST_NUM_ENTRIES
};
/* Flags for brw->state.cache.
diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
index 4c9d7b95db8..be66c9efcb4 100644
--- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
+++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
@@ -36,7 +36,7 @@
* - if/else/endif
*/
bool
-dead_control_flow_eliminate(backend_visitor *v)
+dead_control_flow_eliminate(brw::base_visitor *v)
{
bool progress = false;
diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h
index 57a4dabc83c..1824fb98c33 100644
--- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h
+++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h
@@ -23,4 +23,4 @@
#include "brw_shader.h"
-bool dead_control_flow_eliminate(backend_visitor *v);
+bool dead_control_flow_eliminate(brw::base_visitor *v);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2943f042dd0..2cf2294960b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -124,7 +124,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
case GRF:
case HW_REG:
case MRF:
- this->regs_written = (dst.width * dst.stride * type_sz(dst.type) + 31) / 32;
+ this->regs_written = (MAX2(dst.width * dst.stride, 1) *
+ type_sz(dst.type) + 31) / 32;
break;
case BAD_FILE:
this->regs_written = 0;
@@ -228,7 +229,7 @@ fs_inst::resize_sources(uint8_t num_sources)
if (this->sources != num_sources) {
fs_reg *src = new fs_reg[MAX2(num_sources, 3)];
- for (unsigned i = 0; i < MIN2(this->sources, num_sources); ++i)
+ for (int i = 0; i < MIN2(this->sources, num_sources); ++i)
src[i] = this->src[i];
delete[] this->src;
@@ -237,236 +238,6 @@ fs_inst::resize_sources(uint8_t num_sources)
}
}
-#define ALU1(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0); \
- }
-
-#define ALU2(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \
- }
-
-#define ALU2_ACC(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1) \
- { \
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
- inst->writes_accumulator = true; \
- return inst; \
- }
-
-#define ALU3(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1, const fs_reg &src2) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
- }
-
-ALU1(NOT)
-ALU1(MOV)
-ALU1(FRC)
-ALU1(RNDD)
-ALU1(RNDE)
-ALU1(RNDZ)
-ALU2(ADD)
-ALU2(MUL)
-ALU2_ACC(MACH)
-ALU2(AND)
-ALU2(OR)
-ALU2(XOR)
-ALU2(SHL)
-ALU2(SHR)
-ALU2(ASR)
-ALU3(LRP)
-ALU1(BFREV)
-ALU3(BFE)
-ALU2(BFI1)
-ALU3(BFI2)
-ALU1(FBH)
-ALU1(FBL)
-ALU1(CBIT)
-ALU3(MAD)
-ALU2_ACC(ADDC)
-ALU2_ACC(SUBB)
-ALU2(SEL)
-ALU2(MAC)
-
-/** Gen4 predicated IF. */
-fs_inst *
-fs_visitor::IF(enum brw_predicate predicate)
-{
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width);
- inst->predicate = predicate;
- return inst;
-}
-
-/** Gen6 IF with embedded comparison. */
-fs_inst *
-fs_visitor::IF(const fs_reg &src0, const fs_reg &src1,
- enum brw_conditional_mod condition)
-{
- assert(brw->gen == 6);
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width,
- reg_null_d, src0, src1);
- inst->conditional_mod = condition;
- return inst;
-}
-
-/**
- * CMP: Sets the low bit of the destination channels with the result
- * of the comparison, while the upper bits are undefined, and updates
- * the flag register with the packed 16 bits of the result.
- */
-fs_inst *
-fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1,
- enum brw_conditional_mod condition)
-{
- fs_inst *inst;
-
- /* Take the instruction:
- *
- * CMP null<d> src0<f> src1<f>
- *
- * Original gen4 does type conversion to the destination type before
- * comparison, producing garbage results for floating point comparisons.
- * gen5 does the comparison on the execution type (resolved source types),
- * so dst type doesn't matter. gen6 does comparison and then uses the
- * result as if it was the dst type with no conversion, which happens to
- * mostly work out for float-interpreted-as-int since our comparisons are
- * for >0, =0, <0.
- */
- if (brw->gen == 4) {
- dst.type = src0.type;
- if (dst.file == HW_REG)
- dst.fixed_hw_reg.type = dst.type;
- }
-
- resolve_ud_negate(&src0);
- resolve_ud_negate(&src1);
-
- inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1);
- inst->conditional_mod = condition;
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources)
-{
- uint8_t exec_size = dst.width;
- for (int i = 0; i < sources; ++i) {
- assert(src[i].width % dst.width == 0);
- if (src[i].width > exec_size)
- exec_size = src[i].width;
- }
-
- fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, exec_size,
- dst, src, sources);
- inst->regs_written = 0;
- for (int i = 0; i < sources; ++i) {
- /* The LOAD_PAYLOAD instruction only really makes sense if we are
- * dealing with whole registers. If this ever changes, we can deal
- * with it later.
- */
- int size = src[i].effective_width * type_sz(src[i].type);
- assert(size % 32 == 0);
- inst->regs_written += (size + 31) / 32;
- }
-
- return inst;
-}
-
-exec_list
-fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
- const fs_reg &surf_index,
- const fs_reg &varying_offset,
- uint32_t const_offset)
-{
- exec_list instructions;
- fs_inst *inst;
-
- /* We have our constant surface use a pitch of 4 bytes, so our index can
- * be any component of a vector, and then we load 4 contiguous
- * components starting from that.
- *
- * We break down the const_offset to a portion added to the variable
- * offset and a portion done using reg_offset, which means that if you
- * have GLSL using something like "uniform vec4 a[20]; gl_FragColor =
- * a[i]", we'll temporarily generate 4 vec4 loads from offset i * 4, and
- * CSE can later notice that those loads are all the same and eliminate
- * the redundant ones.
- */
- fs_reg vec4_offset = fs_reg(this, glsl_type::int_type);
- instructions.push_tail(ADD(vec4_offset,
- varying_offset, fs_reg(const_offset & ~3)));
-
- int scale = 1;
- if (brw->gen == 4 && dst.width == 8) {
- /* Pre-gen5, we can either use a SIMD8 message that requires (header,
- * u, v, r) as parameters, or we can just use the SIMD16 message
- * consisting of (header, u). We choose the second, at the cost of a
- * longer return length.
- */
- scale = 2;
- }
-
- enum opcode op;
- if (brw->gen >= 7)
- op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
- else
- op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
-
- assert(dst.width % 8 == 0);
- int regs_written = 4 * (dst.width / 8) * scale;
- fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
- dst.type, dst.width);
- inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset);
- inst->regs_written = regs_written;
- instructions.push_tail(inst);
-
- if (brw->gen < 7) {
- inst->base_mrf = 13;
- inst->header_present = true;
- if (brw->gen == 4)
- inst->mlen = 3;
- else
- inst->mlen = 1 + dispatch_width / 8;
- }
-
- fs_reg result = offset(vec4_result, (const_offset & 3) * scale);
- instructions.push_tail(MOV(dst, result));
-
- return instructions;
-}
-
-/**
- * A helper for MOV generation for fixing up broken hardware SEND dependency
- * handling.
- */
-fs_inst *
-fs_visitor::DEP_RESOLVE_MOV(int grf)
-{
- fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
-
- inst->ir = NULL;
- inst->annotation = "send dependency resolve";
-
- /* The caller always wants uncompressed to emit the minimal extra
- * dependencies, and to avoid having to deal with aligning its regs to 2.
- */
- inst->exec_size = 8;
-
- return inst;
-}
-
bool
fs_inst::equals(fs_inst *inst) const
{
@@ -632,186 +403,6 @@ fs_reg::is_contiguous() const
return stride == 1;
}
-bool
-fs_reg::is_valid_3src() const
-{
- return file == GRF || file == UNIFORM;
-}
-
-int
-fs_visitor::type_size(const struct glsl_type *type)
-{
- unsigned int size, i;
-
- switch (type->base_type) {
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_INT:
- case GLSL_TYPE_FLOAT:
- case GLSL_TYPE_BOOL:
- return type->components();
- case GLSL_TYPE_ARRAY:
- return type_size(type->fields.array) * type->length;
- case GLSL_TYPE_STRUCT:
- size = 0;
- for (i = 0; i < type->length; i++) {
- size += type_size(type->fields.structure[i].type);
- }
- return size;
- case GLSL_TYPE_SAMPLER:
- /* Samplers take up no register space, since they're baked in at
- * link time.
- */
- return 0;
- case GLSL_TYPE_ATOMIC_UINT:
- return 0;
- case GLSL_TYPE_IMAGE:
- case GLSL_TYPE_VOID:
- case GLSL_TYPE_ERROR:
- case GLSL_TYPE_INTERFACE:
- unreachable("not reached");
- }
-
- return 0;
-}
-
-fs_reg
-fs_visitor::get_timestamp()
-{
- assert(brw->gen >= 7);
-
- fs_reg ts = fs_reg(retype(brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_TIMESTAMP,
- 0),
- BRW_REGISTER_TYPE_UD));
-
- fs_reg dst = fs_reg(this, glsl_type::uint_type);
-
- fs_inst *mov = emit(MOV(dst, ts));
- /* We want to read the 3 fields we care about (mostly field 0, but also 2)
- * even if it's not enabled in the dispatch.
- */
- mov->force_writemask_all = true;
- mov->exec_size = 8;
-
- /* The caller wants the low 32 bits of the timestamp. Since it's running
- * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
- * which is plenty of time for our purposes. It is identical across the
- * EUs, but since it's tracking GPU core speed it will increment at a
- * varying rate as render P-states change.
- *
- * The caller could also check if render P-states have changed (or anything
- * else that might disrupt timing) by setting smear to 2 and checking if
- * that field is != 0.
- */
- dst.set_smear(0);
-
- return dst;
-}
-
-void
-fs_visitor::emit_shader_time_begin()
-{
- current_annotation = "shader time start";
- shader_start_time = get_timestamp();
-}
-
-void
-fs_visitor::emit_shader_time_end()
-{
- current_annotation = "shader time end";
-
- enum shader_time_shader_type type, written_type, reset_type;
- if (dispatch_width == 8) {
- type = ST_FS8;
- written_type = ST_FS8_WRITTEN;
- reset_type = ST_FS8_RESET;
- } else {
- assert(dispatch_width == 16);
- type = ST_FS16;
- written_type = ST_FS16_WRITTEN;
- reset_type = ST_FS16_RESET;
- }
-
- fs_reg shader_end_time = get_timestamp();
-
- /* Check that there weren't any timestamp reset events (assuming these
- * were the only two timestamp reads that happened).
- */
- fs_reg reset = shader_end_time;
- reset.set_smear(2);
- fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u)));
- test->conditional_mod = BRW_CONDITIONAL_Z;
- emit(IF(BRW_PREDICATE_NORMAL));
-
- push_force_uncompressed();
- fs_reg start = shader_start_time;
- start.negate = true;
- fs_reg diff = fs_reg(this, glsl_type::uint_type);
- emit(ADD(diff, start, shader_end_time));
-
- /* If there were no instructions between the two timestamp gets, the diff
- * is 2 cycles. Remove that overhead, so I can forget about that when
- * trying to determine the time taken for single instructions.
- */
- emit(ADD(diff, diff, fs_reg(-2u)));
-
- emit_shader_time_write(type, diff);
- emit_shader_time_write(written_type, fs_reg(1u));
- emit(BRW_OPCODE_ELSE);
- emit_shader_time_write(reset_type, fs_reg(1u));
- emit(BRW_OPCODE_ENDIF);
-
- pop_force_uncompressed();
-}
-
-void
-fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
- fs_reg value)
-{
- int shader_time_index =
- brw_get_shader_time_index(brw, shader_prog, prog, type);
- fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
-
- fs_reg payload;
- if (dispatch_width == 8)
- payload = fs_reg(this, glsl_type::uvec2_type);
- else
- payload = fs_reg(this, glsl_type::uint_type);
-
- emit(new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
- fs_reg(), payload, offset, value));
-}
-
-void
-fs_visitor::vfail(const char *format, va_list va)
-{
- char *msg;
-
- if (failed)
- return;
-
- failed = true;
-
- msg = ralloc_vasprintf(mem_ctx, format, va);
- msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg);
-
- this->fail_msg = msg;
-
- if (INTEL_DEBUG & DEBUG_WM) {
- fprintf(stderr, "%s", msg);
- }
-}
-
-void
-fs_visitor::fail(const char *format, ...)
-{
- va_list va;
-
- va_start(va, format);
- vfail(format, va);
- va_end(va);
-}
-
/**
* Mark this program as impossible to compile in SIMD16 mode.
*
@@ -844,58 +435,6 @@ fs_visitor::no16(const char *format, ...)
va_end(va);
}
-fs_inst *
-fs_visitor::emit(enum opcode opcode)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dispatch_width));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1, const fs_reg &src2)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst,
- fs_reg src[], int sources)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src, sources));
-}
-
-void
-fs_visitor::push_force_uncompressed()
-{
- force_uncompressed_stack++;
-}
-
-void
-fs_visitor::pop_force_uncompressed()
-{
- force_uncompressed_stack--;
- assert(force_uncompressed_stack >= 0);
-}
-
/**
* Returns true if the instruction has a flag that means it won't
* update an entire destination register.
@@ -958,67 +497,6 @@ fs_inst::writes_flag() const
opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS;
}
-/**
- * Returns how many MRFs an FS opcode will write over.
- *
- * Note that this is not the 0 or 1 implied writes in an actual gen
- * instruction -- the FS opcodes often generate MOVs in addition.
- */
-int
-fs_visitor::implied_mrf_writes(fs_inst *inst)
-{
- if (inst->mlen == 0)
- return 0;
-
- if (inst->base_mrf == -1)
- return 0;
-
- switch (inst->opcode) {
- case SHADER_OPCODE_RCP:
- case SHADER_OPCODE_RSQ:
- case SHADER_OPCODE_SQRT:
- case SHADER_OPCODE_EXP2:
- case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_SIN:
- case SHADER_OPCODE_COS:
- return 1 * dispatch_width / 8;
- case SHADER_OPCODE_POW:
- case SHADER_OPCODE_INT_QUOTIENT:
- case SHADER_OPCODE_INT_REMAINDER:
- return 2 * dispatch_width / 8;
- case SHADER_OPCODE_TEX:
- case FS_OPCODE_TXB:
- case SHADER_OPCODE_TXD:
- case SHADER_OPCODE_TXF:
- case SHADER_OPCODE_TXF_CMS:
- case SHADER_OPCODE_TXF_MCS:
- case SHADER_OPCODE_TG4:
- case SHADER_OPCODE_TG4_OFFSET:
- case SHADER_OPCODE_TXL:
- case SHADER_OPCODE_TXS:
- case SHADER_OPCODE_LOD:
- return 1;
- case FS_OPCODE_FB_WRITE:
- return 2;
- case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
- case SHADER_OPCODE_GEN4_SCRATCH_READ:
- return 1;
- case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
- return inst->mlen;
- case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- return 2;
- case SHADER_OPCODE_UNTYPED_ATOMIC:
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- case FS_OPCODE_INTERPOLATE_AT_CENTROID:
- case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
- case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
- case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
- return 0;
- default:
- unreachable("not reached");
- }
-}
-
/** Fixed HW reg constructor. */
fs_reg::fs_reg(enum register_file file, int reg)
{
@@ -1078,12 +556,6 @@ fs_reg::fs_reg(fs_visitor *v, const struct glsl_type *type)
assert(this->width == 8 || this->width == 16);
}
-fs_reg *
-fs_visitor::variable_storage(ir_variable *var)
-{
- return (fs_reg *)hash_table_find(this->variable_ht, var);
-}
-
void
import_uniforms_callback(const void *key,
void *data,
@@ -1110,82 +582,102 @@ fs_visitor::import_uniforms(fs_visitor *v)
this->push_constant_loc = v->push_constant_loc;
this->pull_constant_loc = v->pull_constant_loc;
this->uniforms = v->uniforms;
- this->param_size = v->param_size;
+ this->uniform_size = v->uniform_size;
}
-/* Our support for uniforms is piggy-backed on the struct
- * gl_fragment_program, because that's where the values actually
- * get stored, rather than in some global gl_shader_program uniform
- * store.
+/**
+ * A helper for MOV generation for fixing up broken hardware SEND dependency
+ * handling.
*/
-void
-fs_visitor::setup_uniform_values(ir_variable *ir)
+fs_inst *
+fs_visitor::DEP_RESOLVE_MOV(int grf)
{
- int namelen = strlen(ir->name);
+ fs_inst *inst = bld.MOV(brw_null_reg(),
+ fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
- /* The data for our (non-builtin) uniforms is stored in a series of
- * gl_uniform_driver_storage structs for each subcomponent that
- * glGetUniformLocation() could name. We know it's been set up in the same
- * order we'd walk the type, so walk the list of storage and find anything
- * with our name, or the prefix of a component that starts with our name.
+ inst->ir = NULL;
+ inst->annotation = "send dependency resolve";
+
+ /* The caller always wants uncompressed to emit the minimal extra
+ * dependencies, and to avoid having to deal with aligning its regs to 2.
*/
- unsigned params_before = uniforms;
- for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
- struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
- if (strncmp(ir->name, storage->name, namelen) != 0 ||
- (storage->name[namelen] != 0 &&
- storage->name[namelen] != '.' &&
- storage->name[namelen] != '[')) {
- continue;
- }
+ inst->exec_size = 8;
- unsigned slots = storage->type->component_slots();
- if (storage->array_elements)
- slots *= storage->array_elements;
+ return inst;
+}
- for (unsigned i = 0; i < slots; i++) {
- stage_prog_data->param[uniforms++] = &storage->storage[i];
+void
+fs_visitor::emit_pull_constant_load(brw::fs_builder &bld,
+ const fs_reg &dst,
+ const fs_reg &surf_index,
+ uint32_t off,
+ const fs_reg *reladdr,
+ unsigned num_components)
+{
+ if (reladdr) {
+ /* We have our constant surface use a pitch of 4 bytes, so our index can
+ * be any component of a vector, and then we load 4 contiguous
+ * components starting from that.
+ */
+ fs_reg addr = bld.scalar_reg(BRW_REGISTER_TYPE_D);
+ bld.ADD(fs_reg(addr), *reladdr, fs_reg((off / 4) & ~3));
+
+ int scale = 1;
+ if (brw->gen == 4 && dst.width == 8) {
+ /* Pre-gen5, we can either use a SIMD8 message that requires (header,
+ * u, v, r) as parameters, or we can just use the SIMD16 message
+ * consisting of (header, u). We choose the second, at the cost of a
+ * longer return length.
+ */
+ scale = 2;
}
- }
- /* Make sure we actually initialized the right amount of stuff here. */
- assert(params_before + ir->type->component_slots() == uniforms);
- (void)params_before;
-}
+ enum opcode op;
+ if (brw->gen >= 7)
+ op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
+ else
+ op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
+ assert(dst.width % 8 == 0);
+ int regs_written = 4 * (dst.width / 8) * scale;
+ fs_reg result = bld.scalar_reg(dst.type, regs_written);
+ instruction *inst = bld.emit(op, result, surf_index, addr);
-/* Our support for builtin uniforms is even scarier than non-builtin.
- * It sits on top of the PROG_STATE_VAR parameters that are
- * automatically updated from GL context state.
- */
-void
-fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
-{
- const ir_state_slot *const slots = ir->get_state_slots();
- assert(slots != NULL);
+ inst->regs_written = regs_written;
- for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
- /* This state reference has already been setup by ir_to_mesa, but we'll
- * get the same index back here.
- */
- int index = _mesa_add_state_reference(this->prog->Parameters,
- (gl_state_index *)slots[i].tokens);
+ if (brw->gen < 7) {
+ inst->base_mrf = 13;
+ inst->header_present = true;
+ if (brw->gen == 4)
+ inst->mlen = 3;
+ else
+ inst->mlen = 1 + dispatch_width / 8;
+ }
- /* Add each of the unique swizzles of the element as a parameter.
- * This'll end up matching the expected layout of the
- * array/matrix/structure we're trying to fill in.
- */
- int last_swiz = -1;
- for (unsigned int j = 0; j < 4; j++) {
- int swiz = GET_SWZ(slots[i].swizzle, j);
- if (swiz == last_swiz)
- break;
- last_swiz = swiz;
+ for (unsigned i = 0; i < num_components; ++i)
+ bld.MOV(offset(dst, i), offset(fs_reg(result),
+ (((off / 4) & 3) + i) * scale));
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[index][swiz];
+ } else {
+ brw::fs_builder ubld = bld.force_uncompressed();
+ fs_reg result = bld.scalar_reg(dst.type);
+ fs_reg addr;
+
+ if (brw->gen >= 8) {
+ /* Store the offset in a GRF so we can send-from-GRF. */
+ addr = bld.scalar_reg(BRW_REGISTER_TYPE_D);
+ ubld.MOV(fs_reg(addr), fs_reg(off & ~15));
+ } else {
+ /* Immediates are fine on older generations since they'll be moved
+ * to a (potentially fake) MRF at the generator level.
+ */
+ addr = fs_reg(off & ~15);
}
+
+ ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, result, surf_index, addr);
+
+ for (unsigned i = 0; i < num_components; ++i)
+ bld.MOV(offset(dst, i), component(result, ((off / 4) & 3) + i));
}
}
@@ -1200,15 +692,15 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
/* gl_FragCoord.x */
if (ir->data.pixel_center_integer) {
- emit(MOV(wpos, this->pixel_x));
+ bld.MOV(wpos, this->pixel_x);
} else {
- emit(ADD(wpos, this->pixel_x, fs_reg(0.5f)));
+ bld.ADD(wpos, this->pixel_x, fs_reg(0.5f));
}
wpos = offset(wpos, 1);
/* gl_FragCoord.y */
if (!flip && ir->data.pixel_center_integer) {
- emit(MOV(wpos, this->pixel_y));
+ bld.MOV(wpos, this->pixel_y);
} else {
fs_reg pixel_y = this->pixel_y;
float offset = (ir->data.pixel_center_integer ? 0.0 : 0.5);
@@ -1218,15 +710,15 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
offset += key->drawable_height - 1.0;
}
- emit(ADD(wpos, pixel_y, fs_reg(offset)));
+ bld.ADD(wpos, pixel_y, fs_reg(offset));
}
wpos = offset(wpos, 1);
/* gl_FragCoord.z */
if (brw->gen >= 6) {
- emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
+ bld.MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)));
} else {
- emit(FS_OPCODE_LINTERP, wpos,
+ bld.emit(FS_OPCODE_LINTERP, wpos,
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
interp_reg(VARYING_SLOT_POS, 2));
@@ -1234,7 +726,7 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
wpos = offset(wpos, 1);
/* gl_FragCoord.w: Already set up in emit_interpolation */
- emit(BRW_OPCODE_MOV, wpos, this->wpos_w);
+ bld.emit(BRW_OPCODE_MOV, wpos, this->wpos_w);
return reg;
}
@@ -1269,7 +761,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
*/
barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
}
- return emit(FS_OPCODE_LINTERP, attr,
+ return bld.emit(FS_OPCODE_LINTERP, attr,
this->delta_x[barycoord_mode],
this->delta_y[barycoord_mode], interp);
}
@@ -1323,7 +815,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
struct brw_reg interp = interp_reg(location, k);
interp = suboffset(interp, 3);
interp.type = reg->type;
- emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
+ bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
attr = offset(attr, 1);
}
} else {
@@ -1336,7 +828,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
* unlit, replace the centroid data with non-centroid
* data.
*/
- emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+ bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
fs_inst *inst;
inst = emit_linterp(attr, fs_reg(interp), interpolation_mode,
@@ -1360,7 +852,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
ir->data.sample || key->persample_shading);
}
if (brw->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
- emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
+ bld.emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
}
attr = offset(attr, 1);
}
@@ -1393,7 +885,7 @@ fs_visitor::emit_frontfacing_interpolation()
fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
g0.negate = true;
- emit(ASR(*reg, g0, fs_reg(15)));
+ bld.ASR(*reg, g0, fs_reg(15));
} else {
/* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create
* a boolean result from this (1/true or 0/false).
@@ -1410,8 +902,8 @@ fs_visitor::emit_frontfacing_interpolation()
fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
g1_6.negate = true;
- emit(ASR(asr, g1_6, fs_reg(31)));
- emit(AND(*reg, asr, fs_reg(1)));
+ bld.ASR(asr, g1_6, fs_reg(31));
+ bld.AND(*reg, asr, fs_reg(1));
}
return reg;
@@ -1426,9 +918,9 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
if (key->compute_pos_offset) {
/* Convert int_sample_pos to floating point */
- emit(MOV(dst, int_sample_pos));
+ bld.MOV(dst, int_sample_pos);
/* Scale to the range [0, 1] */
- emit(MUL(dst, dst, fs_reg(1 / 16.0f)));
+ bld.MUL(dst, dst, fs_reg(1 / 16.0f));
}
else {
/* From ARB_sample_shading specification:
@@ -1436,7 +928,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
* rasterization is disabled, gl_SamplePosition will always be
* (0.5, 0.5).
*/
- emit(MOV(dst, fs_reg(0.5f)));
+ bld.MOV(dst, fs_reg(0.5f));
}
}
@@ -1445,7 +937,7 @@ fs_visitor::emit_samplepos_setup()
{
assert(brw->gen >= 6);
- this->current_annotation = "compute sample position";
+ bld.set_annotation("compute sample position");
fs_reg *reg = new(this->mem_ctx) fs_reg(this, glsl_type::vec2_type);
fs_reg pos = *reg;
fs_reg int_sample_x = fs_reg(this, glsl_type::int_type);
@@ -1467,21 +959,21 @@ fs_visitor::emit_samplepos_setup()
BRW_REGISTER_TYPE_B), 16, 8, 2);
if (dispatch_width == 8) {
- emit(MOV(int_sample_x, fs_reg(sample_pos_reg)));
+ bld.MOV(int_sample_x, fs_reg(sample_pos_reg));
} else {
- emit(MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg)));
- emit(MOV(half(int_sample_x, 1), fs_reg(suboffset(sample_pos_reg, 16))))
+ bld.MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg));
+ bld.MOV(half(int_sample_x, 1), fs_reg(suboffset(sample_pos_reg, 16)))
->force_sechalf = true;
}
/* Compute gl_SamplePosition.x */
compute_sample_position(pos, int_sample_x);
pos = offset(pos, 1);
if (dispatch_width == 8) {
- emit(MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1))));
+ bld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1)));
} else {
- emit(MOV(half(int_sample_y, 0),
- fs_reg(suboffset(sample_pos_reg, 1))));
- emit(MOV(half(int_sample_y, 1), fs_reg(suboffset(sample_pos_reg, 17))))
+ bld.MOV(half(int_sample_y, 0),
+ fs_reg(suboffset(sample_pos_reg, 1)));
+ bld.MOV(half(int_sample_y, 1), fs_reg(suboffset(sample_pos_reg, 17)))
->force_sechalf = true;
}
/* Compute gl_SamplePosition.y */
@@ -1496,7 +988,7 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir)
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
assert(brw->gen >= 6);
- this->current_annotation = "compute sample id";
+ bld.set_annotation("compute sample id");
fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
if (key->compute_sample_id) {
@@ -1524,130 +1016,30 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir)
* subspan 1, and finally sample 1 of subspan 1.
*/
fs_inst *inst;
- inst = emit(BRW_OPCODE_AND, t1,
+ inst = bld.emit(BRW_OPCODE_AND, t1,
fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
fs_reg(0xc0));
inst->force_writemask_all = true;
- inst = emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
+ inst = bld.emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
inst->force_writemask_all = true;
/* This works for both SIMD8 and SIMD16 */
- inst = emit(MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)));
+ inst = bld.MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210));
inst->force_writemask_all = true;
/* This special instruction takes care of setting vstride=1,
* width=4, hstride=0 of t2 during an ADD instruction.
*/
- emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2);
+ bld.emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2);
} else {
/* As per GL_ARB_sample_shading specification:
* "When rendering to a non-multisample buffer, or if multisample
* rasterization is disabled, gl_SampleID will always be zero."
*/
- emit(BRW_OPCODE_MOV, *reg, fs_reg(0));
+ bld.emit(BRW_OPCODE_MOV, *reg, fs_reg(0));
}
return reg;
}
-fs_reg
-fs_visitor::fix_math_operand(fs_reg src)
-{
- /* Can't do hstride == 0 args on gen6 math, so expand it out. We
- * might be able to do better by doing execsize = 1 math and then
- * expanding that result out, but we would need to be careful with
- * masking.
- *
- * The hardware ignores source modifiers (negate and abs) on math
- * instructions, so we also move to a temp to set those up.
- */
- if (brw->gen == 6 && src.file != UNIFORM && src.file != IMM &&
- !src.abs && !src.negate)
- return src;
-
- /* Gen7 relaxes most of the above restrictions, but still can't use IMM
- * operands to math
- */
- if (brw->gen >= 7 && src.file != IMM)
- return src;
-
- fs_reg expanded = fs_reg(this, glsl_type::float_type);
- expanded.type = src.type;
- emit(BRW_OPCODE_MOV, expanded, src);
- return expanded;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
-{
- switch (opcode) {
- case SHADER_OPCODE_RCP:
- case SHADER_OPCODE_RSQ:
- case SHADER_OPCODE_SQRT:
- case SHADER_OPCODE_EXP2:
- case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_SIN:
- case SHADER_OPCODE_COS:
- break;
- default:
- unreachable("not reached: bad math opcode");
- }
-
- /* Can't do hstride == 0 args to gen6 math, so expand it out. We
- * might be able to do better by doing execsize = 1 math and then
- * expanding that result out, but we would need to be careful with
- * masking.
- *
- * Gen 6 hardware ignores source modifiers (negate and abs) on math
- * instructions, so we also move to a temp to set those up.
- */
- if (brw->gen == 6 || brw->gen == 7)
- src = fix_math_operand(src);
-
- fs_inst *inst = emit(opcode, dst, src);
-
- if (brw->gen < 6) {
- inst->base_mrf = 2;
- inst->mlen = dispatch_width / 8;
- }
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
-{
- int base_mrf = 2;
- fs_inst *inst;
-
- if (brw->gen >= 8) {
- inst = emit(opcode, dst, src0, src1);
- } else if (brw->gen >= 6) {
- src0 = fix_math_operand(src0);
- src1 = fix_math_operand(src1);
-
- inst = emit(opcode, dst, src0, src1);
- } else {
- /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
- * "Message Payload":
- *
- * "Operand0[7]. For the INT DIV functions, this operand is the
- * denominator."
- * ...
- * "Operand1[7]. For the INT DIV functions, this operand is the
- * numerator."
- */
- bool is_int_div = opcode != SHADER_OPCODE_POW;
- fs_reg &op0 = is_int_div ? src1 : src0;
- fs_reg &op1 = is_int_div ? src0 : src1;
-
- emit(MOV(fs_reg(MRF, base_mrf + 1, op1.type, dispatch_width), op1));
- inst = emit(opcode, dst, op0, reg_null_f);
-
- inst->base_mrf = base_mrf;
- inst->mlen = 2 * dispatch_width / 8;
- }
- return inst;
-}
-
void
fs_visitor::assign_curb_setup()
{
@@ -2069,9 +1461,9 @@ fs_visitor::move_uniform_array_access_to_pull_constants()
if (pull_constant_loc[uniform] == -1) {
const gl_constant_value **values = &stage_prog_data->param[uniform];
- assert(param_size[uniform]);
+ assert(uniform_size[uniform]);
- for (int j = 0; j < param_size[uniform]; j++) {
+ for (int j = 0; j < uniform_size[uniform]; j++) {
pull_constant_loc[uniform + j] = stage_prog_data->nr_pull_params;
stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] =
@@ -2187,34 +1579,23 @@ fs_visitor::demote_pull_constants()
continue;
/* Set up the annotation tracking for new generated instructions. */
- base_ir = inst->ir;
- current_annotation = inst->annotation;
+ bld.set_base_ir(inst->ir);
+ bld.set_annotation(inst->annotation);
+ brw::fs_builder ibld = bld.at(block, inst);
fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
- fs_reg dst = fs_reg(this, glsl_type::float_type);
+ fs_reg dst = ibld.scalar_reg(BRW_REGISTER_TYPE_F);
/* Generate a pull load into dst. */
- if (inst->src[i].reladdr) {
- exec_list list = VARYING_PULL_CONSTANT_LOAD(dst,
- surf_index,
- *inst->src[i].reladdr,
- pull_index);
- inst->insert_before(block, &list);
- inst->src[i].reladdr = NULL;
- } else {
- fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
- fs_inst *pull =
- new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
- dst, surf_index, offset);
- inst->insert_before(block, pull);
- inst->src[i].set_smear(pull_index & 3);
- }
+ emit_pull_constant_load(ibld, dst, surf_index, pull_index * 4,
+ inst->src[i].reladdr, 1);
/* Rewrite the instruction to use the temporary VGRF. */
inst->src[i].file = GRF;
inst->src[i].reg = dst.reg;
inst->src[i].reg_offset = 0;
inst->src[i].width = dispatch_width;
+ inst->src[i].reladdr = NULL;
}
}
invalidate_live_intervals();
@@ -2573,13 +1954,13 @@ fs_visitor::emit_repclear_shader()
int base_mrf = 1;
int color_mrf = base_mrf + 2;
- fs_inst *mov = emit(MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)));
+ fs_inst *mov = bld.MOV(vec4(brw_message_reg(color_mrf)),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
mov->force_writemask_all = true;
- fs_inst *write;
+ fs_inst *write = NULL;
if (key->nr_color_regions == 1) {
- write = emit(FS_OPCODE_REP_FB_WRITE);
+ write = bld.emit(FS_OPCODE_REP_FB_WRITE);
write->saturate = key->clamp_fragment_color;
write->base_mrf = color_mrf;
write->target = 0;
@@ -2587,7 +1968,7 @@ fs_visitor::emit_repclear_shader()
write->mlen = 1;
} else {
for (int i = 0; i < key->nr_color_regions; ++i) {
- write = emit(FS_OPCODE_REP_FB_WRITE);
+ write = bld.emit(FS_OPCODE_REP_FB_WRITE);
write->saturate = key->clamp_fragment_color;
write->base_mrf = base_mrf;
write->target = i;
@@ -2597,6 +1978,7 @@ fs_visitor::emit_repclear_shader()
}
write->eot = true;
+ bld = bld.at(NULL, NULL);
calculate_cfg();
assign_constant_locations();
@@ -2983,6 +2365,7 @@ fs_visitor::lower_load_payload()
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
assert(inst->dst.file == MRF || inst->dst.file == GRF);
+ brw::fs_builder ibld = bld.at(block, inst);
fs_reg dst = inst->dst;
for (int i = 0; i < inst->sources; i++) {
@@ -3001,13 +2384,11 @@ fs_visitor::lower_load_payload()
compr4_dst.width = 16;
fs_reg compr4_src = inst->src[i];
compr4_src.width = 16;
- fs_inst *mov = MOV(compr4_dst, compr4_src);
- mov->force_writemask_all = true;
- inst->insert_before(block, mov);
+ brw::exec_all(ibld.MOV(compr4_dst, compr4_src));
/* Mark i+4 as BAD_FILE so we don't emit a MOV for it */
inst->src[i + 4].file = BAD_FILE;
} else {
- fs_inst *mov = MOV(dst, inst->src[i]);
+ fs_inst *mov = ibld.MOV(dst, inst->src[i]);
if (inst->src[i].file == GRF) {
int src_reg = vgrf_to_reg[inst->src[i].reg] +
inst->src[i].reg_offset;
@@ -3029,7 +2410,6 @@ fs_visitor::lower_load_payload()
metadata[dst_reg + 1].force_sechalf = true;
}
}
- inst->insert_before(block, mov);
}
dst = offset(dst, 1);
@@ -3267,34 +2647,6 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, "\n");
}
-/**
- * Possibly returns an instruction that set up @param reg.
- *
- * Sometimes we want to take the result of some expression/variable
- * dereference tree and rewrite the instruction generating the result
- * of the tree. When processing the tree, we know that the
- * instructions generated are all writing temporaries that are dead
- * outside of this tree. So, if we have some instructions that write
- * a temporary, we're free to point that temp write somewhere else.
- *
- * Note that this doesn't guarantee that the instruction generated
- * only reg -- it might be the size=4 destination of a texture instruction.
- */
-fs_inst *
-fs_visitor::get_instruction_generating_reg(fs_inst *start,
- fs_inst *end,
- const fs_reg &reg)
-{
- if (end == start ||
- end->is_partial_write() ||
- reg.reladdr ||
- !reg.equals(end->dst)) {
- return NULL;
- } else {
- return end;
- }
-}
-
void
fs_visitor::setup_payload_gen6()
{
@@ -3480,7 +2832,7 @@ fs_visitor::run()
(stage == MESA_SHADER_FRAGMENT) &&
((brw_wm_prog_key*) this->key)->alpha_test_func;
if (uses_kill || alpha_test_func) {
- fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+ fs_inst *discard_init = bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
discard_init->flag_subreg = 1;
}
@@ -3489,24 +2841,25 @@ fs_visitor::run()
*/
if (shader) {
foreach_in_list(ir_instruction, ir, shader->base.ir) {
- base_ir = ir;
+ bld.set_base_ir(ir);
this->result = reg_undef;
ir->accept(this);
}
} else {
emit_fragment_program_code();
}
- base_ir = NULL;
+ bld.set_base_ir(NULL);
if (failed)
return false;
- emit(FS_OPCODE_PLACEHOLDER_HALT);
+ bld.emit(FS_OPCODE_PLACEHOLDER_HALT);
if (alpha_test_func)
emit_alpha_test();
emit_fb_writes();
+ bld = bld.at(NULL, NULL);
calculate_cfg();
split_virtual_grfs();
@@ -3526,7 +2879,7 @@ fs_visitor::run()
snprintf(filename, 64, "fs%d-%04d-%02d-%02d-" #pass, \
dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
\
- backend_visitor::dump_instructions(filename); \
+ brw::base_visitor::dump_instructions(filename); \
} \
\
progress = progress || this_progress; \
@@ -3537,7 +2890,7 @@ fs_visitor::run()
snprintf(filename, 64, "fs%d-%04d-00-start",
dispatch_width, shader_prog ? shader_prog->Name : 0);
- backend_visitor::dump_instructions(filename);
+ brw::base_visitor::dump_instructions(filename);
}
bool progress;
@@ -3622,7 +2975,6 @@ fs_visitor::run()
}
}
}
- assert(force_uncompressed_stack == 0);
/* This must come after all optimization and register allocation, since
* it inserts dead code that happens to have side effects, and it does
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f38db3b8abb..3982838dd51 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -28,7 +28,7 @@
#pragma once
#include "brw_shader.h"
-#include "brw_ir_fs.h"
+#include "brw_ir_visitor.h"
extern "C" {
@@ -42,7 +42,6 @@ extern "C" {
#include "program/prog_optimize.h"
#include "util/register_allocate.h"
#include "program/sampler.h"
-#include "program/hash_table.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
@@ -81,13 +80,9 @@ public:
*
* Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
*/
-class fs_visitor : public backend_visitor
+class fs_visitor : public brw::backend_visitor<fs_visitor, brw::fs_builder>
{
public:
- const fs_reg reg_null_f;
- const fs_reg reg_null_d;
- const fs_reg reg_null_ud;
-
fs_visitor(struct brw_context *brw,
void *mem_ctx,
const struct brw_wm_prog_key *key,
@@ -95,98 +90,23 @@ public:
struct gl_shader_program *shader_prog,
struct gl_fragment_program *fp,
unsigned dispatch_width);
- ~fs_visitor();
void init();
- fs_reg *variable_storage(ir_variable *var);
void import_uniforms(fs_visitor *v);
void visit(ir_variable *ir);
- void visit(ir_assignment *ir);
- void visit(ir_dereference_variable *ir);
- void visit(ir_dereference_record *ir);
- void visit(ir_dereference_array *ir);
- void visit(ir_expression *ir);
- void visit(ir_texture *ir);
- void visit(ir_if *ir);
- void visit(ir_constant *ir);
- void visit(ir_swizzle *ir);
- void visit(ir_return *ir);
- void visit(ir_loop *ir);
- void visit(ir_loop_jump *ir);
void visit(ir_discard *ir);
- void visit(ir_call *ir);
- void visit(ir_function *ir);
- void visit(ir_function_signature *ir);
void visit(ir_emit_vertex *);
void visit(ir_end_primitive *);
- uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
- void swizzle_result(ir_texture *ir, fs_reg orig_val, uint32_t sampler);
-
- fs_inst *emit(fs_inst *inst);
- void emit(exec_list list);
-
- fs_inst *emit(enum opcode opcode);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst,
- const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst,
- fs_reg src[], int sources);
-
- fs_inst *MOV(const fs_reg &dst, const fs_reg &src);
- fs_inst *NOT(const fs_reg &dst, const fs_reg &src);
- fs_inst *RNDD(const fs_reg &dst, const fs_reg &src);
- fs_inst *RNDE(const fs_reg &dst, const fs_reg &src);
- fs_inst *RNDZ(const fs_reg &dst, const fs_reg &src);
- fs_inst *FRC(const fs_reg &dst, const fs_reg &src);
- fs_inst *ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *MUL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *MACH(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *MAC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *SHL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *SHR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *ASR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *AND(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *OR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *XOR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *IF(enum brw_predicate predicate);
- fs_inst *IF(const fs_reg &src0, const fs_reg &src1,
- enum brw_conditional_mod condition);
- fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
- enum brw_conditional_mod condition);
- fs_inst *LRP(const fs_reg &dst, const fs_reg &a, const fs_reg &y,
- const fs_reg &x);
- fs_inst *DEP_RESOLVE_MOV(int grf);
- fs_inst *BFREV(const fs_reg &dst, const fs_reg &value);
- fs_inst *BFE(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset,
- const fs_reg &value);
- fs_inst *BFI1(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset);
- fs_inst *BFI2(const fs_reg &dst, const fs_reg &bfi1_dst,
- const fs_reg &insert, const fs_reg &base);
- fs_inst *FBH(const fs_reg &dst, const fs_reg &value);
- fs_inst *FBL(const fs_reg &dst, const fs_reg &value);
- fs_inst *CBIT(const fs_reg &dst, const fs_reg &value);
- fs_inst *MAD(const fs_reg &dst, const fs_reg &c, const fs_reg &b,
- const fs_reg &a);
- fs_inst *ADDC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *SUBB(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *SEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-
- int type_size(const struct glsl_type *type);
- fs_inst *get_instruction_generating_reg(fs_inst *start,
- fs_inst *end,
- const fs_reg &reg);
-
- fs_inst *LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources);
-
- exec_list VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
- const fs_reg &surf_index,
- const fs_reg &varying_offset,
- uint32_t const_offset);
+ dst_reg
+ temporary_reg(const glsl_type *type)
+ {
+ return bld.scalar_reg(brw_type_for_base_type(type),
+ type_size(type));
+ }
+
+ instruction *DEP_RESOLVE_MOV(int grf);
bool run();
void assign_binding_table_offsets();
@@ -233,15 +153,10 @@ public:
fs_inst *inst);
void insert_gen4_post_send_dependency_workarounds(bblock_t *block,
fs_inst *inst);
- void vfail(const char *msg, va_list args);
- void fail(const char *msg, ...);
void no16(const char *msg, ...);
void lower_uniform_pull_constant_loads();
bool lower_load_payload();
- void push_force_uncompressed();
- void pop_force_uncompressed();
-
void emit_dummy_fs();
void emit_repclear_shader();
fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
@@ -255,34 +170,26 @@ public:
void emit_interpolation_setup_gen4();
void emit_interpolation_setup_gen6();
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
- fs_reg rescale_texcoord(ir_texture *ir, fs_reg coordinate,
- bool is_rect, uint32_t sampler, int texunit);
fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
- uint32_t sampler);
- fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
- fs_reg sample_index, uint32_t sampler);
- fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
- fs_reg sample_index, fs_reg mcs, fs_reg sampler);
- fs_reg emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, fs_reg sampler);
- void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
- fs_reg fix_math_operand(fs_reg src);
- fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
- fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
- void emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
- const fs_reg &a);
- void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
- const fs_reg &src0, const fs_reg &src1);
- bool try_emit_saturate(ir_expression *ir);
- bool try_emit_mad(ir_expression *ir);
+ const fs_reg &shadow_c, fs_reg lod, fs_reg lod2,
+ const fs_reg &sampler);
+ fs_inst *emit_texture_gen5(ir_texture *ir, const fs_reg &dst, fs_reg coordinate,
+ const fs_reg &shadow_c, fs_reg lod, fs_reg lod2,
+ const fs_reg &sample_index, const fs_reg &sampler);
+ fs_inst *emit_texture_gen7(ir_texture *ir, const fs_reg &dst, fs_reg coordinate,
+ const fs_reg &shadow_c, fs_reg lod, fs_reg lod2,
+ fs_reg offset_val, const fs_reg &sample_index,
+ const fs_reg &mcs, const fs_reg &sampler);
+ fs_inst *emit_texture(ir_texture *ir, const fs_reg &dst,
+ const fs_reg &coordinate, const fs_reg &shadow_c,
+ const fs_reg &lod, const fs_reg &lod2,
+ const fs_reg &offset_val, const fs_reg &sample_index,
+ const fs_reg &mcs, const fs_reg &sampler);
+ fs_reg emit_untyped_surface_header();
void try_replace_with_sel();
bool opt_peephole_sel();
bool opt_peephole_predicated_break();
bool opt_saturate_propagation();
- void emit_bool_to_cond_code(ir_rvalue *condition);
- void emit_if_gen6(ir_if *ir);
void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
uint32_t spill_offset, int count);
void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
@@ -317,59 +224,45 @@ public:
fs_reg src0_alpha, unsigned components);
void emit_fb_writes();
- void emit_shader_time_begin();
- void emit_shader_time_end();
- void emit_shader_time_write(enum shader_time_shader_type type,
- fs_reg value);
-
- void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- fs_reg dst, fs_reg offset, fs_reg src0,
- fs_reg src1);
+ void emit_interpolate_expression(ir_expression *ir);
- void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
- fs_reg offset);
+ void emit_pull_constant_load(brw::fs_builder &bld,
+ const dst_reg &dst,
+ const src_reg &surf_index,
+ uint32_t offset,
+ const src_reg *reladdr,
+ unsigned num_components);
- void emit_interpolate_expression(ir_expression *ir);
+ struct brw_reg interp_reg(int location, int channel);
- bool try_rewrite_rhs_to_dst(ir_assignment *ir,
- fs_reg dst,
- fs_reg src,
- fs_inst *pre_rhs_inst,
- fs_inst *last_rhs_inst);
- void emit_assignment_writes(fs_reg &l, fs_reg &r,
- const glsl_type *type, bool predicated);
- void resolve_ud_negate(fs_reg *reg);
- void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg);
+ void emit_pack_half_2x16(dst_reg dst, src_reg src0)
+ {
+ unreachable("not reached");
+ }
- fs_reg get_timestamp();
+ void emit_unpack_half_2x16(dst_reg dst, src_reg src0)
+ {
+ unreachable("not reached");
+ }
- struct brw_reg interp_reg(int location, int channel);
- void setup_uniform_values(ir_variable *ir);
- void setup_builtin_uniform_values(ir_variable *ir);
- int implied_mrf_writes(fs_inst *inst);
+ const struct brw_sampler_prog_key_data *
+ sampler_prog_key() const {
+ return &((const brw_wm_prog_key *)key)->tex;
+ }
virtual void dump_instructions();
virtual void dump_instructions(const char *name);
void dump_instruction(backend_instruction *inst);
void dump_instruction(backend_instruction *inst, FILE *file);
- void visit_atomic_counter_intrinsic(ir_call *ir);
-
const void *const key;
struct brw_stage_prog_data *prog_data;
unsigned int sanity_param_count;
- int *param_size;
-
- int *virtual_grf_start;
- int *virtual_grf_end;
brw::fs_live_variables *live_intervals;
int *regs_live_at_ip;
- /** Number of uniform variable components visited. */
- unsigned uniforms;
-
/** Byte-offset for the next available spot in the scratch space buffer. */
unsigned last_scratch;
@@ -385,33 +278,19 @@ public:
*/
int *push_constant_loc;
- struct hash_table *variable_ht;
fs_reg frag_depth;
fs_reg sample_mask;
fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
unsigned output_components[BRW_MAX_DRAW_BUFFERS];
fs_reg dual_src_output;
bool do_dual_src;
- int first_non_payload_grf;
- /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
- unsigned max_grf;
fs_reg *fp_temp_regs;
fs_reg *fp_input_regs;
- /** @{ debug annotation info */
- const char *current_annotation;
- const void *base_ir;
- /** @} */
-
- bool failed;
- char *fail_msg;
bool simd16_unsupported;
char *no16_msg;
- /* Result of last visit() method. */
- fs_reg result;
-
/** Register numbers for thread payload fields. */
struct {
uint8_t source_depth_reg;
@@ -435,14 +314,11 @@ public:
fs_reg pixel_w;
fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
- fs_reg shader_start_time;
unsigned grf_used;
bool spilled_any_registers;
const unsigned dispatch_width; /**< 8 or 16 */
-
- int force_uncompressed_stack;
};
/**
@@ -486,7 +362,8 @@ private:
struct brw_reg src1);
void generate_math_gen4(fs_inst *inst,
struct brw_reg dst,
- struct brw_reg src);
+ struct brw_reg src0,
+ struct brw_reg src1);
void generate_math_g45(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src);
@@ -540,11 +417,6 @@ private:
struct brw_reg dst,
struct brw_reg src);
- void generate_shader_time_add(fs_inst *inst,
- struct brw_reg payload,
- struct brw_reg offset,
- struct brw_reg value);
-
void generate_untyped_atomic(fs_inst *inst,
struct brw_reg dst,
struct brw_reg payload,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index e1989cb5e4c..99a412a85b7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -302,7 +302,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
(entry->dst.reg_offset + entry->regs_written) * 32)
return false;
- /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */
+ /* See fix_condmod_negate() and comment in brw_fs_emit.cpp. */
if (inst->conditional_mod &&
inst->src[arg].type == BRW_REGISTER_TYPE_UD &&
entry->src.negate)
@@ -381,7 +381,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
break;
case GRF:
{
- assert(entry->src.width % inst->src[arg].width == 0);
+ assert(entry->src.width % inst->src[arg].width == 0 ||
+ entry->src.width == 1);
/* In this case, we'll just leave the width alone. The source
* register could have different widths depending on how it is
* being used. For instance, if only half of the register was
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 87f67564657..0aeb67c0900 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -211,18 +211,18 @@ fs_visitor::opt_cse_local(bblock_t *block)
entry->tmp = tmp;
entry->generator->dst = tmp;
- fs_inst *copy;
+ brw::fs_builder ibld = bld.at(block,
+ (fs_inst *)entry->generator->next);
if (written > dst_width) {
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width);
for (int i = 0; i < written / dst_width; i++)
sources[i] = offset(tmp, i);
- copy = LOAD_PAYLOAD(orig_dst, sources, written / dst_width);
+ ibld.LOAD_PAYLOAD(orig_dst, sources, written / dst_width);
} else {
- copy = MOV(orig_dst, tmp);
- copy->force_writemask_all =
+ ibld.MOV(orig_dst, tmp)
+ ->force_writemask_all =
entry->generator->force_writemask_all;
}
- entry->generator->insert_after(block, copy);
}
/* dest <- temp */
@@ -234,17 +234,16 @@ fs_visitor::opt_cse_local(bblock_t *block)
assert(inst->dst.type == entry->tmp.type);
fs_reg dst = inst->dst;
fs_reg tmp = entry->tmp;
- fs_inst *copy;
+ brw::fs_builder ibld = bld.at(block, inst);
if (written > dst_width) {
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width);
for (int i = 0; i < written / dst_width; i++)
sources[i] = offset(tmp, i);
- copy = LOAD_PAYLOAD(dst, sources, written / dst_width);
+ ibld.LOAD_PAYLOAD(dst, sources, written / dst_width);
} else {
- copy = MOV(dst, tmp);
- copy->force_writemask_all = inst->force_writemask_all;
+ ibld.MOV(dst, tmp)
+ ->force_writemask_all = inst->force_writemask_all;
}
- inst->insert_before(block, copy);
}
/* Set our iterator so that next time through the loop inst->next
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
index 9f0c0c7ac48..114bf67494d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -37,7 +37,7 @@ fs_visitor::emit_fp_alu1(enum opcode opcode,
{
for (int i = 0; i < 4; i++) {
if (fpi->DstReg.WriteMask & (1 << i))
- emit(opcode, offset(dst, i), offset(src, i));
+ bld.emit(opcode, offset(dst, i), offset(src, i));
}
}
@@ -48,7 +48,7 @@ fs_visitor::emit_fp_alu2(enum opcode opcode,
{
for (int i = 0; i < 4; i++) {
if (fpi->DstReg.WriteMask & (1 << i))
- emit(opcode, offset(dst, i),
+ bld.emit(opcode, offset(dst, i),
offset(src0, i), offset(src1, i));
}
}
@@ -65,7 +65,7 @@ fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
for (int i = 0; i < 4; i++) {
if (fpi->DstReg.WriteMask & (1 << i)) {
- emit_minmax(conditionalmod, offset(dst, i),
+ bld.emit_minmax(conditionalmod, offset(dst, i),
offset(src0, i), offset(src1, i));
}
}
@@ -81,10 +81,10 @@ fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod,
if (fpi->DstReg.WriteMask & (1 << i)) {
fs_inst *inst;
- emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
- conditional_mod));
+ bld.CMP(bld.reg_null_d(), offset(src0, i), offset(src1, i),
+ conditional_mod);
- inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
+ inst = bld.emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
inst->predicate = BRW_PREDICATE_NORMAL;
}
}
@@ -96,7 +96,7 @@ fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
{
for (int i = 0; i < 4; i++) {
if (fpi->DstReg.WriteMask & (1 << i))
- emit(MOV(offset(dst, i), src));
+ bld.MOV(offset(dst, i), src);
}
}
@@ -106,7 +106,7 @@ fs_visitor::emit_fp_scalar_math(enum opcode opcode,
fs_reg dst, fs_reg src)
{
fs_reg temp = fs_reg(this, glsl_type::float_type);
- emit_math(opcode, temp, src);
+ bld.emit_math(opcode, temp, src);
emit_fp_scalar_write(fpi, dst, temp);
}
@@ -126,11 +126,11 @@ fs_visitor::emit_fragment_program_code()
* mov.f0 dst 1.0
*/
fs_reg one = fs_reg(this, glsl_type::float_type);
- emit(MOV(one, fs_reg(1.0f)));
+ bld.MOV(one, fs_reg(1.0f));
for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
const struct prog_instruction *fpi = &prog->Instructions[insn];
- base_ir = fpi;
+ bld.set_base_ir(fpi);
//_mesa_print_instruction(fpi);
@@ -161,10 +161,10 @@ fs_visitor::emit_fragment_program_code()
if (fpi->DstReg.WriteMask & (1 << i)) {
fs_inst *inst;
- emit(CMP(reg_null_f, offset(src[0], i), fs_reg(0.0f),
- BRW_CONDITIONAL_L));
+ bld.CMP(bld.reg_null_f(), offset(src[0], i), fs_reg(0.0f),
+ BRW_CONDITIONAL_L);
- inst = emit(BRW_OPCODE_SEL, offset(dst, i),
+ inst = bld.emit(BRW_OPCODE_SEL, offset(dst, i),
offset(src[1], i), offset(src[2], i));
inst->predicate = BRW_PREDICATE_NORMAL;
}
@@ -191,14 +191,14 @@ fs_visitor::emit_fragment_program_code()
default: unreachable("not reached");
}
- emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
+ bld.MUL(acc, offset(src[0], 0), offset(src[1], 0));
for (int i = 1; i < count; i++) {
- emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
- emit(ADD(acc, acc, mul));
+ bld.MUL(mul, offset(src[0], i), offset(src[1], i));
+ bld.ADD(acc, acc, mul);
}
if (fpi->Opcode == OPCODE_DPH)
- emit(ADD(acc, acc, offset(src[1], 3)));
+ bld.ADD(acc, acc, offset(src[1], 3));
emit_fp_scalar_write(fpi, dst, acc);
break;
@@ -206,15 +206,15 @@ fs_visitor::emit_fragment_program_code()
case OPCODE_DST:
if (fpi->DstReg.WriteMask & WRITEMASK_X)
- emit(MOV(dst, fs_reg(1.0f)));
+ bld.MOV(dst, fs_reg(1.0f));
if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
- emit(MUL(offset(dst, 1),
- offset(src[0], 1), offset(src[1], 1)));
+ bld.MUL(offset(dst, 1),
+ offset(src[0], 1), offset(src[1], 1));
}
if (fpi->DstReg.WriteMask & WRITEMASK_Z)
- emit(MOV(offset(dst, 2), offset(src[0], 2)));
+ bld.MOV(offset(dst, 2), offset(src[0], 2));
if (fpi->DstReg.WriteMask & WRITEMASK_W)
- emit(MOV(offset(dst, 3), offset(src[1], 3)));
+ bld.MOV(offset(dst, 3), offset(src[1], 3));
break;
case OPCODE_EX2:
@@ -248,8 +248,8 @@ fs_visitor::emit_fragment_program_code()
* undiscarded pixels, and updates just those pixels to be
* turned off.
*/
- fs_inst *cmp = emit(CMP(reg_null_f, offset(src[0], i),
- fs_reg(0.0f), BRW_CONDITIONAL_GE));
+ fs_inst *cmp = bld.CMP(bld.reg_null_f(), offset(src[0], i),
+ fs_reg(0.0f), BRW_CONDITIONAL_GE);
cmp->predicate = BRW_PREDICATE_NORMAL;
cmp->flag_subreg = 1;
}
@@ -277,30 +277,30 @@ fs_visitor::emit_fragment_program_code()
* brw_wm_emit.c either.
*/
if (fpi->DstReg.WriteMask & WRITEMASK_X)
- emit(MOV(offset(dst, 0), fs_reg(1.0f)));
+ bld.MOV(offset(dst, 0), fs_reg(1.0f));
if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
fs_inst *inst;
- emit(CMP(reg_null_f, offset(src[0], 0), fs_reg(0.0f),
- BRW_CONDITIONAL_LE));
+ bld.CMP(bld.reg_null_f(), offset(src[0], 0), fs_reg(0.0f),
+ BRW_CONDITIONAL_LE);
if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
- emit(MOV(offset(dst, 1), offset(src[0], 0)));
- inst = emit(MOV(offset(dst, 1), fs_reg(0.0f)));
+ bld.MOV(offset(dst, 1), offset(src[0], 0));
+ inst = bld.MOV(offset(dst, 1), fs_reg(0.0f));
inst->predicate = BRW_PREDICATE_NORMAL;
}
if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
- emit_math(SHADER_OPCODE_POW, offset(dst, 2),
- offset(src[0], 1), offset(src[0], 3));
+ bld.emit_math(SHADER_OPCODE_POW, offset(dst, 2),
+ offset(src[0], 1), offset(src[0], 3));
- inst = emit(MOV(offset(dst, 2), fs_reg(0.0f)));
+ inst = bld.MOV(offset(dst, 2), fs_reg(0.0f));
inst->predicate = BRW_PREDICATE_NORMAL;
}
}
if (fpi->DstReg.WriteMask & WRITEMASK_W)
- emit(MOV(offset(dst, 3), fs_reg(1.0f)));
+ bld.MOV(offset(dst, 3), fs_reg(1.0f));
break;
@@ -310,7 +310,7 @@ fs_visitor::emit_fragment_program_code()
fs_reg a = offset(src[0], i);
fs_reg y = offset(src[1], i);
fs_reg x = offset(src[2], i);
- emit_lrp(offset(dst, i), x, y, a);
+ bld.LRP(offset(dst, i), x, y, a);
}
}
break;
@@ -319,8 +319,8 @@ fs_visitor::emit_fragment_program_code()
for (int i = 0; i < 4; i++) {
if (fpi->DstReg.WriteMask & (1 << i)) {
fs_reg temp = fs_reg(this, glsl_type::float_type);
- emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
- emit(ADD(offset(dst, i), temp, offset(src[2], i)));
+ bld.MUL(temp, offset(src[0], i), offset(src[1], i));
+ bld.ADD(offset(dst, i), temp, offset(src[2], i));
}
}
break;
@@ -343,7 +343,7 @@ fs_visitor::emit_fragment_program_code()
case OPCODE_POW: {
fs_reg temp = fs_reg(this, glsl_type::float_type);
- emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
+ bld.emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
emit_fp_scalar_write(fpi, dst, temp);
break;
}
@@ -358,13 +358,13 @@ fs_visitor::emit_fragment_program_code()
case OPCODE_SCS:
if (fpi->DstReg.WriteMask & WRITEMASK_X) {
- emit_math(SHADER_OPCODE_COS, offset(dst, 0),
- offset(src[0], 0));
+ bld.emit_math(SHADER_OPCODE_COS, offset(dst, 0),
+ offset(src[0], 0));
}
if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
- emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
- offset(src[0], 1));
+ bld.emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
+ offset(src[0], 1));
}
break;
@@ -414,10 +414,10 @@ fs_visitor::emit_fragment_program_code()
coordinate = fs_reg(this, glsl_type::vec3_type);
fs_reg invproj = fs_reg(this, glsl_type::float_type);
- emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
+ bld.emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
for (int i = 0; i < 3; i++) {
- emit(MUL(offset(coordinate, i),
- offset(src[0], i), invproj));
+ bld.MUL(offset(coordinate, i),
+ offset(src[0], i), invproj);
}
break;
}
@@ -457,14 +457,14 @@ fs_visitor::emit_fragment_program_code()
fs_reg abscoord = coordinate;
abscoord.negate = false;
abscoord.abs = true;
- emit_minmax(BRW_CONDITIONAL_GE, temp,
- offset(abscoord, 0), offset(abscoord, 1));
- emit_minmax(BRW_CONDITIONAL_GE, temp,
- temp, offset(abscoord, 2));
- emit_math(SHADER_OPCODE_RCP, temp, temp);
+ bld.emit_minmax(BRW_CONDITIONAL_GE, temp,
+ offset(abscoord, 0), offset(abscoord, 1));
+ bld.emit_minmax(BRW_CONDITIONAL_GE, temp,
+ temp, offset(abscoord, 2));
+ bld.emit_math(SHADER_OPCODE_RCP, temp, temp);
for (int i = 0; i < 3; i++) {
- emit(MUL(offset(cubecoord, i),
- offset(coordinate, i), temp));
+ bld.MUL(offset(cubecoord, i),
+ offset(coordinate, i), temp);
}
coordinate = cubecoord;
@@ -485,15 +485,9 @@ fs_visitor::emit_fragment_program_code()
fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
fpi->TexSrcUnit, fpi->TexSrcUnit);
- fs_inst *inst;
- if (brw->gen >= 7) {
- inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fs_reg(0u), fs_reg(fpi->TexSrcUnit));
- } else if (brw->gen >= 5) {
- inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fpi->TexSrcUnit);
- } else {
- inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy, fpi->TexSrcUnit);
- }
-
+ fs_inst *inst = emit_texture(ir, dst, coordinate, shadow_c, lod, dpdy,
+ fs_reg(), sample_index, fs_reg(0u),
+ fs_reg(fpi->TexSrcUnit));
inst->shadow_compare = fpi->TexShadow;
/* Reuse the GLSL swizzle_result() handler. */
@@ -519,10 +513,10 @@ fs_visitor::emit_fragment_program_code()
fs_reg temp = fs_reg(this, glsl_type::float_type);
fs_reg neg_src1_1 = offset(src[1], i1);
neg_src1_1.negate = !neg_src1_1.negate;
- emit(MUL(temp, offset(src[0], i2), neg_src1_1));
- emit(MUL(offset(dst, i),
- offset(src[0], i1), offset(src[1], i2)));
- emit(ADD(offset(dst, i), offset(dst, i), temp));
+ bld.MUL(temp, offset(src[0], i2), neg_src1_1);
+ bld.MUL(offset(dst, i),
+ offset(src[0], i1), offset(src[1], i2));
+ bld.ADD(offset(dst, i), offset(dst, i), temp);
}
}
break;
@@ -543,8 +537,8 @@ fs_visitor::emit_fragment_program_code()
for (int i = 0; i < 4; i++) {
if (fpi->DstReg.WriteMask & (1 << i)) {
- fs_inst *inst = emit(MOV(offset(real_dst, i),
- offset(dst, i)));
+ fs_inst *inst = bld.MOV(offset(real_dst, i),
+ offset(dst, i));
inst->saturate = fpi->SaturateMode;
}
}
@@ -556,10 +550,10 @@ fs_visitor::emit_fragment_program_code()
* Fragment depth has this strange convention of being the .z component of
* a vec4. emit_fb_write() wants to see a float value, instead.
*/
- this->current_annotation = "result.depth write";
+ bld.set_annotation("result.depth write");
if (frag_depth.file != BAD_FILE) {
fs_reg temp = fs_reg(this, glsl_type::float_type);
- emit(MOV(temp, offset(frag_depth, 2)));
+ bld.MOV(temp, offset(frag_depth, 2));
frag_depth = temp;
}
}
@@ -595,8 +589,8 @@ fs_visitor::setup_fp_regs()
ir_var_shader_in);
ir->data.location = i;
- this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
- i);
+ bld.set_annotation(ralloc_asprintf(ctx, "interpolate input %d",
+ i));
switch (i) {
case VARYING_SLOT_POS:
@@ -615,15 +609,15 @@ fs_visitor::setup_fp_regs()
fp_input_regs[i] = *emit_general_interpolation(ir);
if (i == VARYING_SLOT_FOGC) {
- emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)));
- emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)));
- emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)));
+ bld.MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f));
+ bld.MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f));
+ bld.MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f));
}
break;
}
- this->current_annotation = NULL;
+ bld.set_annotation(NULL);
}
}
}
@@ -708,8 +702,8 @@ fs_visitor::get_fp_src_reg(const prog_src_register *src)
result = fs_reg(this, glsl_type::vec4_type);
for (int i = 0; i < 4; i++) {
- emit(MOV(offset(result, i),
- fs_reg(plist->ParameterValues[src->Index][i].f)));
+ bld.MOV(offset(result, i),
+ fs_reg(plist->ParameterValues[src->Index][i].f));
}
break;
}
@@ -742,15 +736,15 @@ fs_visitor::get_fp_src_reg(const prog_src_register *src)
*/
int src_swiz = GET_SWZ(src->Swizzle, i);
if (src_swiz == SWIZZLE_ZERO) {
- emit(MOV(offset(result, i), fs_reg(0.0f)));
+ bld.MOV(offset(result, i), fs_reg(0.0f));
} else if (src_swiz == SWIZZLE_ONE) {
- emit(MOV(offset(result, i),
- negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
+ bld.MOV(offset(result, i),
+ negate ? fs_reg(-1.0f) : fs_reg(1.0f));
} else {
fs_reg src = offset(unswizzled, src_swiz);
if (negate)
src.negate = !src.negate;
- emit(MOV(offset(result, i), src));
+ bld.MOV(offset(result, i), src);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c2010c036c9..ee3eec4a665 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -329,28 +329,51 @@ fs_generator::generate_math_gen6(fs_inst *inst,
void
fs_generator::generate_math_gen4(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src)
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
+ /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
+ * "Message Payload":
+ *
+ * "Operand0[7]. For the INT DIV functions, this operand is the
+ * denominator."
+ * ...
+ * "Operand1[7]. For the INT DIV functions, this operand is the
+ * numerator."
+ */
+ bool is_int_div = (inst->opcode == SHADER_OPCODE_INT_QUOTIENT ||
+ inst->opcode == SHADER_OPCODE_INT_REMAINDER);
+ struct brw_reg &op0 = is_int_div ? src1 : src0;
+ struct brw_reg &op1 = is_int_div ? src0 : src1;
int op = brw_math_function(inst->opcode);
assert(inst->mlen >= 1);
+ if (src1.file != BRW_ARCHITECTURE_REGISTER_FILE) {
+ brw_push_insn_state(p);
+ brw_set_default_saturate(p, false);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
+ brw_pop_insn_state(p);
+ }
+
if (dispatch_width == 8) {
gen4_math(p, dst,
op,
- inst->base_mrf, src,
+ inst->base_mrf, op0,
BRW_MATH_PRECISION_FULL);
+
} else if (dispatch_width == 16) {
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
gen4_math(p, firsthalf(dst),
op,
- inst->base_mrf, firsthalf(src),
+ inst->base_mrf, firsthalf(op0),
BRW_MATH_PRECISION_FULL);
brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
gen4_math(p, sechalf(dst),
op,
- inst->base_mrf + 1, sechalf(src),
+ inst->base_mrf + 1, sechalf(op0),
BRW_MATH_PRECISION_FULL);
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
@@ -362,13 +385,9 @@ fs_generator::generate_math_g45(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src)
{
- if (inst->opcode == SHADER_OPCODE_POW ||
- inst->opcode == SHADER_OPCODE_INT_QUOTIENT ||
- inst->opcode == SHADER_OPCODE_INT_REMAINDER) {
- generate_math_gen4(inst, dst, src);
- return;
- }
-
+ assert(inst->opcode != SHADER_OPCODE_POW &&
+ inst->opcode != SHADER_OPCODE_INT_QUOTIENT &&
+ inst->opcode != SHADER_OPCODE_INT_REMAINDER);
int op = brw_math_function(inst->opcode);
assert(inst->mlen >= 1);
@@ -1442,45 +1461,6 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
}
void
-fs_generator::generate_shader_time_add(fs_inst *inst,
- struct brw_reg payload,
- struct brw_reg offset,
- struct brw_reg value)
-{
- assert(brw->gen >= 7);
- brw_push_insn_state(p);
- brw_set_default_mask_control(p, true);
-
- assert(payload.file == BRW_GENERAL_REGISTER_FILE);
- struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
- offset.type);
- struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
- value.type);
-
- assert(offset.file == BRW_IMMEDIATE_VALUE);
- if (value.file == BRW_GENERAL_REGISTER_FILE) {
- value.width = BRW_WIDTH_1;
- value.hstride = BRW_HORIZONTAL_STRIDE_0;
- value.vstride = BRW_VERTICAL_STRIDE_0;
- } else {
- assert(value.file == BRW_IMMEDIATE_VALUE);
- }
-
- /* Trying to deal with setup of the params from the IR is crazy in the FS8
- * case, and we don't really care about squeezing every bit of performance
- * out of this path, so we just emit the MOVs from here.
- */
- brw_MOV(p, payload_offset, offset);
- brw_MOV(p, payload_value, value);
- brw_shader_time_add(p, payload,
- prog_data->binding_table.shader_time_start);
- brw_pop_insn_state(p);
-
- brw_mark_surface_used(prog_data,
- prog_data->binding_table.shader_time_start);
-}
-
-void
fs_generator::generate_untyped_atomic(fs_inst *inst, struct brw_reg dst,
struct brw_reg payload,
struct brw_reg atomic_op,
@@ -1805,7 +1785,7 @@ fs_generator::generate_code(const cfg_t *cfg)
} else if (brw->gen == 5 || brw->is_g4x) {
generate_math_g45(inst, dst, src[0]);
} else {
- generate_math_gen4(inst, dst, src[0]);
+ generate_math_gen4(inst, dst, src[0], brw_null_reg());
}
break;
case SHADER_OPCODE_INT_QUOTIENT:
@@ -1817,7 +1797,7 @@ fs_generator::generate_code(const cfg_t *cfg)
} else if (brw->gen >= 6) {
generate_math_gen6(inst, dst, src[0], src[1]);
} else {
- generate_math_gen4(inst, dst, src[0]);
+ generate_math_gen4(inst, dst, src[0], src[1]);
}
break;
case FS_OPCODE_PIXEL_X:
@@ -1905,7 +1885,10 @@ fs_generator::generate_code(const cfg_t *cfg)
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
- generate_shader_time_add(inst, src[0], src[1], src[2]);
+ brw_shader_time_add(p, src[0],
+ prog_data->binding_table.shader_time_start);
+ brw_mark_surface_used(prog_data,
+ prog_data->binding_table.shader_time_start);
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
index b7a1d7e7722..7e7371f3a8e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
@@ -85,8 +85,8 @@ fs_visitor::opt_peephole_predicated_break()
* instruction to set the flag register.
*/
if (brw->gen == 6 && if_inst->conditional_mod) {
- fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod);
+ fs_inst *cmp_inst = bld.CMP(bld.reg_null_d(), if_inst->src[0], if_inst->src[1],
+ if_inst->conditional_mod);
if_inst->insert_before(if_block, cmp_inst);
jump_inst->predicate = BRW_PREDICATE_NORMAL;
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 833ba15b1b6..b792b03e5e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -666,7 +666,7 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src,
for (int i = 0; i < count / reg_size; i++) {
fs_inst *spill_inst =
new(mem_ctx) fs_inst(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
- reg_null_f, src);
+ bld.reg_null_f(), src);
src.reg_offset += reg_size;
spill_inst->offset = spill_offset + i * reg_size;
spill_inst->ir = inst->ir;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
index c3bfd00e70d..c3e96a6e31a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
@@ -66,7 +66,8 @@ count_movs_from_if(fs_inst *then_mov[MAX_MOVS], fs_inst *else_mov[MAX_MOVS],
{
int then_movs = 0;
foreach_inst_in_block(fs_inst, inst, then_block) {
- if (then_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV)
+ if (then_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV ||
+ inst->is_partial_write())
break;
then_mov[then_movs] = inst;
@@ -75,7 +76,12 @@ count_movs_from_if(fs_inst *then_mov[MAX_MOVS], fs_inst *else_mov[MAX_MOVS],
int else_movs = 0;
foreach_inst_in_block(fs_inst, inst, else_block) {
- if (else_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV)
+ if (else_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV ||
+ /* Check that the MOVs are the right form. */
+ !then_mov[else_movs] || !then_mov[else_movs]->dst.equals(inst->dst) ||
+ /* Check that source types for mov operations match. */
+ then_mov[else_movs]->src[0].type != inst->src[0].type ||
+ inst->is_partial_write())
break;
else_mov[else_movs] = inst;
@@ -148,13 +154,13 @@ fs_visitor::opt_peephole_sel()
if (movs == 0)
continue;
- fs_inst *sel_inst[MAX_MOVS] = { NULL };
- fs_inst *mov_imm_inst[MAX_MOVS] = { NULL };
-
+ brw::fs_builder ibld = bld.at(block, if_inst);
enum brw_predicate predicate;
bool predicate_inverse;
if (brw->gen == 6 && if_inst->conditional_mod) {
- /* For Sandybridge with IF with embedded comparison */
+ /* For Sandybridge with IF with embedded comparison. */
+ ibld.CMP(ibld.reg_null_d(), if_inst->src[0], if_inst->src[1],
+ if_inst->conditional_mod);
predicate = BRW_PREDICATE_NORMAL;
predicate_inverse = false;
} else {
@@ -165,25 +171,8 @@ fs_visitor::opt_peephole_sel()
/* Generate SEL instructions for pairs of MOVs to a common destination. */
for (int i = 0; i < movs; i++) {
- if (!then_mov[i] || !else_mov[i])
- break;
-
- /* Check that the MOVs are the right form. */
- if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
- then_mov[i]->is_partial_write() ||
- else_mov[i]->is_partial_write()) {
- movs = i;
- break;
- }
-
- /* Check that source types for mov operations match. */
- if (then_mov[i]->src[0].type != else_mov[i]->src[0].type) {
- movs = i;
- break;
- }
-
if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) {
- sel_inst[i] = MOV(then_mov[i]->dst, then_mov[i]->src[0]);
+ ibld.MOV(then_mov[i]->dst, then_mov[i]->src[0]);
} else {
/* Only the last source register can be a constant, so if the MOV
* in the "then" clause uses a constant, we need to put it in a
@@ -193,29 +182,13 @@ fs_visitor::opt_peephole_sel()
if (src0.file == IMM) {
src0 = fs_reg(this, glsl_type::float_type);
src0.type = then_mov[i]->src[0].type;
- mov_imm_inst[i] = MOV(src0, then_mov[i]->src[0]);
+ ibld.MOV(src0, then_mov[i]->src[0]);
}
- sel_inst[i] = SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]);
- sel_inst[i]->predicate = predicate;
- sel_inst[i]->predicate_inverse = predicate_inverse;
+ brw::exec_predicate_inv(
+ predicate, predicate_inverse,
+ ibld.SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]));
}
- }
-
- if (movs == 0)
- continue;
-
- /* Emit a CMP if our IF used the embedded comparison */
- if (brw->gen == 6 && if_inst->conditional_mod) {
- fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod);
- if_inst->insert_before(block, cmp_inst);
- }
-
- for (int i = 0; i < movs; i++) {
- if (mov_imm_inst[i])
- if_inst->insert_before(block, mov_imm_inst[i]);
- if_inst->insert_before(block, sel_inst[i]);
then_mov[i]->remove(then_block);
else_mov[i]->remove(else_block);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 034a4830a9b..a898ebbe636 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -37,7 +37,6 @@ extern "C" {
#include "program/prog_print.h"
#include "program/prog_optimize.h"
#include "util/register_allocate.h"
-#include "program/sampler.h"
#include "program/hash_table.h"
#include "brw_context.h"
#include "brw_eu.h"
@@ -67,8 +66,7 @@ fs_visitor::visit(ir_variable *ir)
reg = emit_general_interpolation(ir);
}
assert(reg);
- hash_table_insert(this->variable_ht, reg, ir);
- return;
+
} else if (ir->data.mode == ir_var_shader_out) {
reg = new(this->mem_ctx) fs_reg(this, ir->type);
@@ -105,35 +103,6 @@ fs_visitor::visit(ir_variable *ir)
this->output_components[output] = vector_elements;
}
}
- } else if (ir->data.mode == ir_var_uniform) {
- int param_index = uniforms;
-
- /* Thanks to the lower_ubo_reference pass, we will see only
- * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
- * variables, so no need for them to be in variable_ht.
- *
- * Some uniforms, such as samplers and atomic counters, have no actual
- * storage, so we should ignore them.
- */
- if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
- return;
-
- if (dispatch_width == 16) {
- if (!variable_storage(ir)) {
- fail("Failed to find uniform '%s' in SIMD16\n", ir->name);
- }
- return;
- }
-
- param_size[param_index] = type_size(ir->type);
- if (!strncmp(ir->name, "gl_", 3)) {
- setup_builtin_uniform_values(ir);
- } else {
- setup_uniform_values(ir);
- }
-
- reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
- reg->type = brw_type_for_base_type(ir->type);
} else if (ir->data.mode == ir_var_system_value) {
if (ir->data.location == SYSTEM_VALUE_SAMPLE_POS) {
@@ -146,199 +115,13 @@ fs_visitor::visit(ir_variable *ir)
fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
BRW_REGISTER_TYPE_D));
}
- }
-
- if (!reg)
- reg = new(this->mem_ctx) fs_reg(this, ir->type);
-
- hash_table_insert(this->variable_ht, reg, ir);
-}
-
-void
-fs_visitor::visit(ir_dereference_variable *ir)
-{
- fs_reg *reg = variable_storage(ir->var);
-
- if (!reg) {
- fail("Failed to find variable storage for %s\n", ir->var->name);
- this->result = fs_reg(reg_null_d);
- return;
- }
- this->result = *reg;
-}
-
-void
-fs_visitor::visit(ir_dereference_record *ir)
-{
- const glsl_type *struct_type = ir->record->type;
-
- ir->record->accept(this);
-
- unsigned int off = 0;
- for (unsigned int i = 0; i < struct_type->length; i++) {
- if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
- break;
- off += type_size(struct_type->fields.structure[i].type);
- }
- this->result = offset(this->result, off);
- this->result.type = brw_type_for_base_type(ir->type);
-}
-
-void
-fs_visitor::visit(ir_dereference_array *ir)
-{
- ir_constant *constant_index;
- fs_reg src;
- int element_size = type_size(ir->type);
-
- constant_index = ir->array_index->as_constant();
-
- ir->array->accept(this);
- src = this->result;
- src.type = brw_type_for_base_type(ir->type);
-
- if (constant_index) {
- assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG);
- src = offset(src, constant_index->value.i[0] * element_size);
- } else {
- /* Variable index array dereference. We attach the variable index
- * component to the reg as a pointer to a register containing the
- * offset. Currently only uniform arrays are supported in this patch,
- * and that reladdr pointer is resolved by
- * move_uniform_array_access_to_pull_constants(). All other array types
- * are lowered by lower_variable_index_to_cond_assign().
- */
- ir->array_index->accept(this);
-
- fs_reg index_reg;
- index_reg = fs_reg(this, glsl_type::int_type);
- emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size));
-
- if (src.reladdr) {
- emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg);
- }
-
- src.reladdr = ralloc(mem_ctx, fs_reg);
- memcpy(src.reladdr, &index_reg, sizeof(index_reg));
- }
- this->result = src;
-}
-void
-fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
- const fs_reg &a)
-{
- if (brw->gen < 6 ||
- !x.is_valid_3src() ||
- !y.is_valid_3src() ||
- !a.is_valid_3src()) {
- /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
- fs_reg y_times_a = fs_reg(this, glsl_type::float_type);
- fs_reg one_minus_a = fs_reg(this, glsl_type::float_type);
- fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type);
-
- emit(MUL(y_times_a, y, a));
-
- fs_reg negative_a = a;
- negative_a.negate = !a.negate;
- emit(ADD(one_minus_a, negative_a, fs_reg(1.0f)));
- emit(MUL(x_times_one_minus_a, x, one_minus_a));
-
- emit(ADD(dst, x_times_one_minus_a, y_times_a));
} else {
- /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
- * we need to reorder the operands.
- */
- emit(LRP(dst, a, y, x));
- }
-}
-
-void
-fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
- const fs_reg &src0, const fs_reg &src1)
-{
- fs_inst *inst;
-
- if (brw->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->conditional_mod = conditionalmod;
- } else {
- emit(CMP(reg_null_d, src0, src1, conditionalmod));
-
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-}
-
-bool
-fs_visitor::try_emit_saturate(ir_expression *ir)
-{
- if (ir->operation != ir_unop_saturate)
- return false;
-
- ir_rvalue *sat_val = ir->operands[0];
-
- fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
-
- sat_val->accept(this);
- fs_reg src = this->result;
-
- fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
-
- /* If the last instruction from our accept() generated our
- * src, just set the saturate flag instead of emmitting a separate mov.
- */
- fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
- if (modify && modify->regs_written == modify->dst.width / 8 &&
- modify->can_do_saturate()) {
- modify->saturate = true;
- this->result = src;
- return true;
- }
-
- return false;
-}
-
-bool
-fs_visitor::try_emit_mad(ir_expression *ir)
-{
- /* 3-src instructions were introduced in gen6. */
- if (brw->gen < 6)
- return false;
-
- /* MAD can only handle floating-point data. */
- if (ir->type != glsl_type::float_type)
- return false;
-
- ir_rvalue *nonmul = ir->operands[1];
- ir_expression *mul = ir->operands[0]->as_expression();
-
- if (!mul || mul->operation != ir_binop_mul) {
- nonmul = ir->operands[0];
- mul = ir->operands[1]->as_expression();
-
- if (!mul || mul->operation != ir_binop_mul)
- return false;
+ backend_visitor::visit(ir);
+ return;
}
- if (nonmul->as_constant() ||
- mul->operands[0]->as_constant() ||
- mul->operands[1]->as_constant())
- return false;
-
- nonmul->accept(this);
- fs_reg src0 = this->result;
-
- mul->operands[0]->accept(this);
- fs_reg src1 = this->result;
-
- mul->operands[1]->accept(this);
- fs_reg src2 = this->result;
-
- this->result = fs_reg(this, ir->type);
- emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
-
- return true;
+ hash_table_insert(this->variable_ht, reg, ir);
}
static int
@@ -391,7 +174,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
switch (ir->operation) {
case ir_unop_interpolate_at_centroid:
- inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
+ inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
break;
case ir_binop_interpolate_at_sample: {
@@ -399,7 +182,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
assert(sample_num || !"nonconstant sample number should have been lowered.");
unsigned msg_data = sample_num->value.i[0] << 4;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data));
+ inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data));
break;
}
@@ -408,7 +191,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
if (const_offset) {
unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
(pack_pixel_offset(const_offset->value.f[1]) << 4);
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
+ inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
fs_reg(msg_data));
} else {
/* pack the operands: hw wants offsets as 4 bit signed ints */
@@ -417,8 +200,8 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
fs_reg src2 = src;
for (int i = 0; i < 2; i++) {
fs_reg temp = fs_reg(this, glsl_type::float_type);
- emit(MUL(temp, this->result, fs_reg(16.0f)));
- emit(MOV(src2, temp)); /* float to int */
+ bld.MUL(temp, this->result, fs_reg(16.0f));
+ bld.MOV(src2, temp); /* float to int */
/* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires
* that we support a maximum offset of +0.5, which isn't representable
@@ -433,7 +216,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
* FRAGMENT_INTERPOLATION_OFFSET_BITS"
*/
- fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7));
+ fs_inst *inst = bld.emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7));
inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
src2 = offset(src2, 1);
@@ -441,7 +224,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
}
mlen = 2 * reg_width;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
+ inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
fs_reg(0u));
}
break;
@@ -463,714 +246,17 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
for (int i = 0; i < ir->type->vector_elements; i++) {
int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
- emit(FS_OPCODE_LINTERP, res,
+ bld.emit(FS_OPCODE_LINTERP, res,
dst_x, dst_y,
fs_reg(interp_reg(var->data.location, ch)));
res = offset(res, 1);
}
}
-void
-fs_visitor::visit(ir_expression *ir)
-{
- unsigned int operand;
- fs_reg op[3], temp;
- fs_inst *inst;
-
- assert(ir->get_num_operands() <= 3);
-
- if (try_emit_saturate(ir))
- return;
-
- /* Deal with the real oddball stuff first */
- switch (ir->operation) {
- case ir_binop_add:
- if (try_emit_mad(ir))
- return;
- break;
-
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_offset:
- case ir_binop_interpolate_at_sample:
- emit_interpolate_expression(ir);
- return;
-
- default:
- break;
- }
-
- for (operand = 0; operand < ir->get_num_operands(); operand++) {
- ir->operands[operand]->accept(this);
- if (this->result.file == BAD_FILE) {
- fail("Failed to get tree for expression operand:\n");
- ir->operands[operand]->fprint(stderr);
- fprintf(stderr, "\n");
- }
- assert(this->result.is_valid_3src());
- op[operand] = this->result;
-
- /* Matrix expression operands should have been broken down to vector
- * operations already.
- */
- assert(!ir->operands[operand]->type->is_matrix());
- /* And then those vector operands should have been broken down to scalar.
- */
- assert(!ir->operands[operand]->type->is_vector());
- }
-
- /* Storage for our result. If our result goes into an assignment, it will
- * just get copy-propagated out, so no worries.
- */
- this->result = fs_reg(this, ir->type);
-
- switch (ir->operation) {
- case ir_unop_logic_not:
- if (ctx->Const.UniformBooleanTrue != 1) {
- emit(NOT(this->result, op[0]));
- } else {
- emit(XOR(this->result, op[0], fs_reg(1)));
- }
- break;
- case ir_unop_neg:
- op[0].negate = !op[0].negate;
- emit(MOV(this->result, op[0]));
- break;
- case ir_unop_abs:
- op[0].abs = true;
- op[0].negate = false;
- emit(MOV(this->result, op[0]));
- break;
- case ir_unop_sign:
- if (ir->type->is_float()) {
- /* AND(val, 0x80000000) gives the sign bit.
- *
- * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
- * zero.
- */
- emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
-
- op[0].type = BRW_REGISTER_TYPE_UD;
- this->result.type = BRW_REGISTER_TYPE_UD;
- emit(AND(this->result, op[0], fs_reg(0x80000000u)));
-
- inst = emit(OR(this->result, this->result, fs_reg(0x3f800000u)));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- this->result.type = BRW_REGISTER_TYPE_F;
- } else {
- /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
- * -> non-negative val generates 0x00000000.
- * Predicated OR sets 1 if val is positive.
- */
- emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
-
- emit(ASR(this->result, op[0], fs_reg(31)));
-
- inst = emit(OR(this->result, this->result, fs_reg(1)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
- break;
- case ir_unop_rcp:
- emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
- break;
-
- case ir_unop_exp2:
- emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
- break;
- case ir_unop_log2:
- emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
- break;
- case ir_unop_exp:
- case ir_unop_log:
- unreachable("not reached: should be handled by ir_explog_to_explog2");
- case ir_unop_sin:
- case ir_unop_sin_reduced:
- emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
- break;
- case ir_unop_cos:
- case ir_unop_cos_reduced:
- emit_math(SHADER_OPCODE_COS, this->result, op[0]);
- break;
-
- case ir_unop_dFdx:
- emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT));
- break;
- case ir_unop_dFdx_coarse:
- emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE));
- break;
- case ir_unop_dFdx_fine:
- emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE));
- break;
- case ir_unop_dFdy:
- emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT));
- break;
- case ir_unop_dFdy_coarse:
- emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE));
- break;
- case ir_unop_dFdy_fine:
- emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE));
- break;
-
- case ir_binop_add:
- emit(ADD(this->result, op[0], op[1]));
- break;
- case ir_binop_sub:
- unreachable("not reached: should be handled by ir_sub_to_add_neg");
-
- case ir_binop_mul:
- if (brw->gen < 8 && ir->type->is_integer()) {
- /* For integer multiplication, the MUL uses the low 16 bits
- * of one of the operands (src0 on gen6, src1 on gen7). The
- * MACH accumulates in the contribution of the upper 16 bits
- * of that operand.
- */
- if (ir->operands[0]->is_uint16_constant()) {
- if (brw->gen < 7)
- emit(MUL(this->result, op[0], op[1]));
- else
- emit(MUL(this->result, op[1], op[0]));
- } else if (ir->operands[1]->is_uint16_constant()) {
- if (brw->gen < 7)
- emit(MUL(this->result, op[1], op[0]));
- else
- emit(MUL(this->result, op[0], op[1]));
- } else {
- if (brw->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- this->result.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(reg_null_d, op[0], op[1]));
- emit(MOV(this->result, fs_reg(acc)));
- }
- } else {
- emit(MUL(this->result, op[0], op[1]));
- }
- break;
- case ir_binop_imul_high: {
- if (brw->gen == 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- this->result.type);
-
- fs_inst *mul = emit(MUL(acc, op[0], op[1]));
- emit(MACH(this->result, op[0], op[1]));
-
- /* Until Gen8, integer multiplies read 32-bits from one source, and
- * 16-bits from the other, and relying on the MACH instruction to
- * generate the high bits of the result.
- *
- * On Gen8, the multiply instruction does a full 32x32-bit multiply,
- * but in order to do a 64x64-bit multiply we have to simulate the
- * previous behavior and then use a MACH instruction.
- *
- * FINISHME: Don't use source modifiers on src1.
- */
- if (brw->gen >= 8) {
- assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
- mul->src[1].type == BRW_REGISTER_TYPE_UD);
- if (mul->src[1].type == BRW_REGISTER_TYPE_D) {
- mul->src[1].type = BRW_REGISTER_TYPE_W;
- } else {
- mul->src[1].type = BRW_REGISTER_TYPE_UW;
- }
- }
-
- break;
- }
- case ir_binop_div:
- /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
- assert(ir->type->is_integer());
- emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
- break;
- case ir_binop_carry: {
- if (brw->gen == 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- BRW_REGISTER_TYPE_UD);
-
- emit(ADDC(reg_null_ud, op[0], op[1]));
- emit(MOV(this->result, fs_reg(acc)));
- break;
- }
- case ir_binop_borrow: {
- if (brw->gen == 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- BRW_REGISTER_TYPE_UD);
-
- emit(SUBB(reg_null_ud, op[0], op[1]));
- emit(MOV(this->result, fs_reg(acc)));
- break;
- }
- case ir_binop_mod:
- /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
- assert(ir->type->is_integer());
- emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
- break;
-
- case ir_binop_less:
- case ir_binop_greater:
- case ir_binop_lequal:
- case ir_binop_gequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- if (ctx->Const.UniformBooleanTrue == 1) {
- resolve_bool_comparison(ir->operands[0], &op[0]);
- resolve_bool_comparison(ir->operands[1], &op[1]);
- }
-
- emit(CMP(this->result, op[0], op[1],
- brw_conditional_for_comparison(ir->operation)));
- break;
-
- case ir_binop_logic_xor:
- emit(XOR(this->result, op[0], op[1]));
- break;
-
- case ir_binop_logic_or:
- emit(OR(this->result, op[0], op[1]));
- break;
-
- case ir_binop_logic_and:
- emit(AND(this->result, op[0], op[1]));
- break;
-
- case ir_binop_dot:
- case ir_unop_any:
- unreachable("not reached: should be handled by brw_fs_channel_expressions");
-
- case ir_unop_noise:
- unreachable("not reached: should be handled by lower_noise");
-
- case ir_quadop_vector:
- unreachable("not reached: should be handled by lower_quadop_vector");
-
- case ir_binop_vector_extract:
- unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()");
-
- case ir_triop_vector_insert:
- unreachable("not reached: should be handled by lower_vector_insert()");
-
- case ir_binop_ldexp:
- unreachable("not reached: should be handled by ldexp_to_arith()");
-
- case ir_unop_sqrt:
- emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
- break;
-
- case ir_unop_rsq:
- emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
- break;
-
- case ir_unop_bitcast_i2f:
- case ir_unop_bitcast_u2f:
- op[0].type = BRW_REGISTER_TYPE_F;
- this->result = op[0];
- break;
- case ir_unop_i2u:
- case ir_unop_bitcast_f2u:
- op[0].type = BRW_REGISTER_TYPE_UD;
- this->result = op[0];
- break;
- case ir_unop_u2i:
- case ir_unop_bitcast_f2i:
- op[0].type = BRW_REGISTER_TYPE_D;
- this->result = op[0];
- break;
- case ir_unop_i2f:
- case ir_unop_u2f:
- case ir_unop_f2i:
- case ir_unop_f2u:
- emit(MOV(this->result, op[0]));
- break;
-
- case ir_unop_b2i:
- emit(AND(this->result, op[0], fs_reg(1)));
- break;
- case ir_unop_b2f:
- if (ctx->Const.UniformBooleanTrue != 1) {
- op[0].type = BRW_REGISTER_TYPE_UD;
- this->result.type = BRW_REGISTER_TYPE_UD;
- emit(AND(this->result, op[0], fs_reg(0x3f800000u)));
- this->result.type = BRW_REGISTER_TYPE_F;
- } else {
- temp = fs_reg(this, glsl_type::int_type);
- emit(AND(temp, op[0], fs_reg(1)));
- emit(MOV(this->result, temp));
- }
- break;
-
- case ir_unop_f2b:
- emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
- break;
- case ir_unop_i2b:
- emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
- break;
-
- case ir_unop_trunc:
- emit(RNDZ(this->result, op[0]));
- break;
- case ir_unop_ceil:
- op[0].negate = !op[0].negate;
- emit(RNDD(this->result, op[0]));
- this->result.negate = true;
- break;
- case ir_unop_floor:
- emit(RNDD(this->result, op[0]));
- break;
- case ir_unop_fract:
- emit(FRC(this->result, op[0]));
- break;
- case ir_unop_round_even:
- emit(RNDE(this->result, op[0]));
- break;
-
- case ir_binop_min:
- case ir_binop_max:
- resolve_ud_negate(&op[0]);
- resolve_ud_negate(&op[1]);
- emit_minmax(ir->operation == ir_binop_min ?
- BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
- this->result, op[0], op[1]);
- break;
- case ir_unop_pack_snorm_2x16:
- case ir_unop_pack_snorm_4x8:
- case ir_unop_pack_unorm_2x16:
- case ir_unop_pack_unorm_4x8:
- case ir_unop_unpack_snorm_2x16:
- case ir_unop_unpack_snorm_4x8:
- case ir_unop_unpack_unorm_2x16:
- case ir_unop_unpack_unorm_4x8:
- case ir_unop_unpack_half_2x16:
- case ir_unop_pack_half_2x16:
- unreachable("not reached: should be handled by lower_packing_builtins");
- case ir_unop_unpack_half_2x16_split_x:
- emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
- break;
- case ir_unop_unpack_half_2x16_split_y:
- emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
- break;
- case ir_binop_pow:
- emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
- break;
-
- case ir_unop_bitfield_reverse:
- emit(BFREV(this->result, op[0]));
- break;
- case ir_unop_bit_count:
- emit(CBIT(this->result, op[0]));
- break;
- case ir_unop_find_msb:
- temp = fs_reg(this, glsl_type::uint_type);
- emit(FBH(temp, op[0]));
-
- /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
- * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
- * subtract the result from 31 to convert the MSB count into an LSB count.
- */
-
- /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
- emit(MOV(this->result, temp));
- emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
-
- temp.negate = true;
- inst = emit(ADD(this->result, temp, fs_reg(31)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- break;
- case ir_unop_find_lsb:
- emit(FBL(this->result, op[0]));
- break;
- case ir_unop_saturate:
- inst = emit(MOV(this->result, op[0]));
- inst->saturate = true;
- break;
- case ir_triop_bitfield_extract:
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(BFE(this->result, op[2], op[1], op[0]));
- break;
- case ir_binop_bfm:
- emit(BFI1(this->result, op[0], op[1]));
- break;
- case ir_triop_bfi:
- emit(BFI2(this->result, op[0], op[1], op[2]));
- break;
- case ir_quadop_bitfield_insert:
- unreachable("not reached: should be handled by "
- "lower_instructions::bitfield_insert_to_bfm_bfi");
-
- case ir_unop_bit_not:
- emit(NOT(this->result, op[0]));
- break;
- case ir_binop_bit_and:
- emit(AND(this->result, op[0], op[1]));
- break;
- case ir_binop_bit_xor:
- emit(XOR(this->result, op[0], op[1]));
- break;
- case ir_binop_bit_or:
- emit(OR(this->result, op[0], op[1]));
- break;
-
- case ir_binop_lshift:
- emit(SHL(this->result, op[0], op[1]));
- break;
-
- case ir_binop_rshift:
- if (ir->type->base_type == GLSL_TYPE_INT)
- emit(ASR(this->result, op[0], op[1]));
- else
- emit(SHR(this->result, op[0], op[1]));
- break;
- case ir_binop_pack_half_2x16_split:
- emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
- break;
- case ir_binop_ubo_load: {
- /* This IR node takes a constant uniform block and a constant or
- * variable byte offset within the block and loads a vector from that.
- */
- ir_constant *const_uniform_block = ir->operands[0]->as_constant();
- ir_constant *const_offset = ir->operands[1]->as_constant();
- fs_reg surf_index;
-
- if (const_uniform_block) {
- /* The block index is a constant, so just emit the binding table entry
- * as an immediate.
- */
- surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
- const_uniform_block->value.u[0]);
- } else {
- /* The block index is not a constant. Evaluate the index expression
- * per-channel and add the base UBO index; the generator will select
- * a value from any live channel.
- */
- surf_index = fs_reg(this, glsl_type::uint_type);
- emit(ADD(surf_index, op[0],
- fs_reg(stage_prog_data->binding_table.ubo_start)))
- ->force_writemask_all = true;
-
- /* Assume this may touch any UBO. It would be nice to provide
- * a tighter bound, but the array information is already lowered away.
- */
- brw_mark_surface_used(prog_data,
- stage_prog_data->binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
- }
-
- if (const_offset) {
- fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
- packed_consts.type = result.type;
-
- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
- emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
- packed_consts, surf_index, const_offset_reg));
-
- for (int i = 0; i < ir->type->vector_elements; i++) {
- packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i);
-
- /* The std140 packing rules don't allow vectors to cross 16-byte
- * boundaries, and a reg is 32 bytes.
- */
- assert(packed_consts.subreg_offset < 32);
-
- /* UBO bools are any nonzero value. We consider bools to be
- * values with the low bit set to 1. Convert them using CMP.
- */
- if (ir->type->base_type == GLSL_TYPE_BOOL) {
- emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
- } else {
- emit(MOV(result, packed_consts));
- }
-
- result = offset(result, 1);
- }
- } else {
- /* Turn the byte offset into a dword offset. */
- fs_reg base_offset = fs_reg(this, glsl_type::int_type);
- emit(SHR(base_offset, op[1], fs_reg(2)));
-
- for (int i = 0; i < ir->type->vector_elements; i++) {
- emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
- base_offset, i));
-
- if (ir->type->base_type == GLSL_TYPE_BOOL)
- emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
-
- result = offset(result, 1);
- }
- }
-
- result.reg_offset = 0;
- break;
- }
-
- case ir_triop_fma:
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(MAD(this->result, op[2], op[1], op[0]));
- break;
-
- case ir_triop_lrp:
- emit_lrp(this->result, op[0], op[1], op[2]);
- break;
-
- case ir_triop_csel:
- emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
- inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]);
- inst->predicate = BRW_PREDICATE_NORMAL;
- break;
-
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_offset:
- case ir_binop_interpolate_at_sample:
- unreachable("already handled above");
- break;
- }
-}
-
-void
-fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
- const glsl_type *type, bool predicated)
-{
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_INT:
- case GLSL_TYPE_BOOL:
- for (unsigned int i = 0; i < type->components(); i++) {
- l.type = brw_type_for_base_type(type);
- r.type = brw_type_for_base_type(type);
-
- if (predicated || !l.equals(r)) {
- fs_inst *inst = emit(MOV(l, r));
- inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE;
- }
-
- l = offset(l, 1);
- r = offset(r, 1);
- }
- break;
- case GLSL_TYPE_ARRAY:
- for (unsigned int i = 0; i < type->length; i++) {
- emit_assignment_writes(l, r, type->fields.array, predicated);
- }
- break;
-
- case GLSL_TYPE_STRUCT:
- for (unsigned int i = 0; i < type->length; i++) {
- emit_assignment_writes(l, r, type->fields.structure[i].type,
- predicated);
- }
- break;
-
- case GLSL_TYPE_SAMPLER:
- case GLSL_TYPE_IMAGE:
- case GLSL_TYPE_ATOMIC_UINT:
- break;
-
- case GLSL_TYPE_VOID:
- case GLSL_TYPE_ERROR:
- case GLSL_TYPE_INTERFACE:
- unreachable("not reached");
- }
-}
-
-/* If the RHS processing resulted in an instruction generating a
- * temporary value, and it would be easy to rewrite the instruction to
- * generate its result right into the LHS instead, do so. This ends
- * up reliably removing instructions where it can be tricky to do so
- * later without real UD chain information.
- */
-bool
-fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
- fs_reg dst,
- fs_reg src,
- fs_inst *pre_rhs_inst,
- fs_inst *last_rhs_inst)
-{
- /* Only attempt if we're doing a direct assignment. */
- if (ir->condition ||
- !(ir->lhs->type->is_scalar() ||
- (ir->lhs->type->is_vector() &&
- ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
- return false;
-
- /* Make sure the last instruction generated our source reg. */
- fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
- last_rhs_inst,
- src);
- if (!modify)
- return false;
-
- /* If last_rhs_inst wrote a different number of components than our LHS,
- * we can't safely rewrite it.
- */
- if (alloc.sizes[dst.reg] != modify->regs_written)
- return false;
-
- /* Success! Rewrite the instruction. */
- modify->dst = dst;
-
- return true;
-}
-
-void
-fs_visitor::visit(ir_assignment *ir)
-{
- fs_reg l, r;
- fs_inst *inst;
-
- /* FINISHME: arrays on the lhs */
- ir->lhs->accept(this);
- l = this->result;
-
- fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
-
- ir->rhs->accept(this);
- r = this->result;
-
- fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
-
- assert(l.file != BAD_FILE);
- assert(r.file != BAD_FILE);
-
- if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
- return;
-
- if (ir->condition) {
- emit_bool_to_cond_code(ir->condition);
- }
-
- if (ir->lhs->type->is_scalar() ||
- ir->lhs->type->is_vector()) {
- for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
- if (ir->write_mask & (1 << i)) {
- inst = emit(MOV(l, r));
- if (ir->condition)
- inst->predicate = BRW_PREDICATE_NORMAL;
- r = offset(r, 1);
- }
- l = offset(l, 1);
- }
- } else {
- emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
- }
-}
-
fs_inst *
fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- fs_reg shadow_c, fs_reg lod, fs_reg dPdy,
- uint32_t sampler)
+ const fs_reg &shadow_c, fs_reg lod, fs_reg lod2,
+ const fs_reg &sampler)
{
int mlen;
int base_mrf = 1;
@@ -1182,7 +268,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
if (shadow_c.file != BAD_FILE) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate = offset(coordinate, 1);
}
@@ -1190,7 +276,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
* the unused slots must be zeroed.
*/
for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f));
}
mlen += 3;
@@ -1198,25 +284,25 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
/* There's no plain shadow compare message, so we use shadow
* compare with a bias of 0.0.
*/
- emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
mlen++;
} else if (ir->op == ir_txb || ir->op == ir_txl) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), lod);
mlen++;
} else {
unreachable("Should not get here.");
}
- emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c);
mlen++;
} else if (ir->op == ir_tex) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate = offset(coordinate, 1);
}
/* zero the others. */
for (int i = ir->coordinate->type->vector_elements; i<3; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f));
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
mlen += 3;
@@ -1224,7 +310,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
fs_reg &dPdx = lod;
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate = offset(coordinate, 1);
}
/* the slots for u and v are always present, but r is optional */
@@ -1245,20 +331,20 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
* m5 m6 m7 m8 m9 m10
*/
for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx);
dPdx = offset(dPdx, 1);
}
mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy));
- dPdy = offset(dPdy, 1);
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), lod2);
+ lod2 = offset(lod2, 1);
}
mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
} else if (ir->op == ir_txs) {
/* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
simd16 = true;
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod);
mlen += 2;
} else {
/* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
@@ -1268,8 +354,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf);
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
- coordinate));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
+ coordinate);
coordinate = offset(coordinate, 1);
}
@@ -1277,13 +363,13 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
* be necessary for TXF (ld), but seems wise to do for all messages.
*/
for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f));
}
/* lod/bias appears after u/v/r. */
mlen += 6;
- emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod);
mlen++;
/* The unused upper half. */
@@ -1315,7 +401,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
unreachable("not reached");
}
- fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
+ fs_inst *inst = bld.emit(opcode, dst, reg_undef, sampler);
inst->base_mrf = base_mrf;
inst->mlen = mlen;
inst->header_present = true;
@@ -1323,7 +409,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
if (simd16) {
for (int i = 0; i < 4; i++) {
- emit(MOV(orig_dst, dst));
+ bld.MOV(orig_dst, dst);
orig_dst = offset(orig_dst, 1);
dst = offset(dst, 2);
}
@@ -1341,9 +427,9 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
* surprising in the disassembly.
*/
fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- fs_reg shadow_c, fs_reg lod, fs_reg lod2,
- fs_reg sample_index, uint32_t sampler)
+fs_visitor::emit_texture_gen5(ir_texture *ir, const fs_reg &dst, fs_reg coordinate,
+ const fs_reg &shadow_c, fs_reg lod, fs_reg lod2,
+ const fs_reg &sample_index, const fs_reg &sampler)
{
int reg_width = dispatch_width / 8;
bool header_present = false;
@@ -1362,7 +448,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
}
for (int i = 0; i < vector_elements; i++) {
- emit(MOV(retype(offset(msg_coords, i), coordinate.type), coordinate));
+ bld.MOV(retype(offset(msg_coords, i), coordinate.type), coordinate);
coordinate = offset(coordinate, 1);
}
fs_reg msg_end = offset(msg_coords, vector_elements);
@@ -1370,7 +456,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
if (shadow_c.file != BAD_FILE) {
fs_reg msg_shadow = msg_lod;
- emit(MOV(msg_shadow, shadow_c));
+ bld.MOV(msg_shadow, shadow_c);
msg_lod = offset(msg_shadow, 1);
msg_end = msg_lod;
}
@@ -1381,13 +467,13 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
opcode = SHADER_OPCODE_TEX;
break;
case ir_txb:
- emit(MOV(msg_lod, lod));
+ bld.MOV(msg_lod, lod);
msg_end = offset(msg_lod, 1);
opcode = FS_OPCODE_TXB;
break;
case ir_txl:
- emit(MOV(msg_lod, lod));
+ bld.MOV(msg_lod, lod);
msg_end = offset(msg_lod, 1);
opcode = SHADER_OPCODE_TXL;
@@ -1404,11 +490,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
*/
msg_end = msg_lod;
for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
- emit(MOV(msg_end, lod));
+ bld.MOV(msg_end, lod);
lod = offset(lod, 1);
msg_end = offset(msg_end, 1);
- emit(MOV(msg_end, lod2));
+ bld.MOV(msg_end, lod2);
lod2 = offset(lod2, 1);
msg_end = offset(msg_end, 1);
}
@@ -1418,21 +504,21 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
}
case ir_txs:
msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);
- emit(MOV(msg_lod, lod));
+ bld.MOV(msg_lod, lod);
msg_end = offset(msg_lod, 1);
opcode = SHADER_OPCODE_TXS;
break;
case ir_query_levels:
msg_lod = msg_end;
- emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
msg_end = offset(msg_lod, 1);
opcode = SHADER_OPCODE_TXS;
break;
case ir_txf:
msg_lod = offset(msg_coords, 3);
- emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod));
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod);
msg_end = offset(msg_lod, 1);
opcode = SHADER_OPCODE_TXF;
@@ -1440,9 +526,9 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_txf_ms:
msg_lod = offset(msg_coords, 3);
/* lod */
- emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
/* sample index */
- emit(MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index));
+ bld.MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index);
msg_end = offset(msg_lod, 2);
opcode = SHADER_OPCODE_TXF_CMS;
@@ -1457,7 +543,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
unreachable("not reached");
}
- fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
+ fs_inst *inst = bld.emit(opcode, dst, reg_undef, sampler);
inst->base_mrf = message.reg;
inst->mlen = msg_end.reg - message.reg;
inst->header_present = header_present;
@@ -1481,14 +567,15 @@ is_high_sampler(struct brw_context *brw, fs_reg sampler)
}
fs_inst *
-fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- fs_reg shadow_c, fs_reg lod, fs_reg lod2,
- fs_reg sample_index, fs_reg mcs, fs_reg sampler)
+fs_visitor::emit_texture_gen7(ir_texture *ir, const fs_reg &dst, fs_reg coordinate,
+ const fs_reg &shadow_c, fs_reg lod, fs_reg lod2,
+ fs_reg offset_val, const fs_reg &sample_index,
+ const fs_reg &mcs, const fs_reg &sampler)
{
- int reg_width = dispatch_width / 8;
+ int reg_width = bld.dispatch_width() / 8;
bool header_present = false;
-
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE);
+
for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) {
sources[i] = fs_reg(this, glsl_type::float_type);
}
@@ -1512,7 +599,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
}
if (shadow_c.file != BAD_FILE) {
- emit(MOV(sources[length], shadow_c));
+ bld.MOV(sources[length], shadow_c);
length++;
}
@@ -1525,11 +612,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_lod:
break;
case ir_txb:
- emit(MOV(sources[length], lod));
+ bld.MOV(sources[length], lod);
length++;
break;
case ir_txl:
- emit(MOV(sources[length], lod));
+ bld.MOV(sources[length], lod);
length++;
break;
case ir_txd: {
@@ -1539,19 +626,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
* [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
*/
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(sources[length], coordinate));
- coordinate = offset(coordinate, 1);
- length++;
+ bld.MOV(sources[length], coordinate);
+ coordinate = offset(coordinate, 1);
+ length++;
/* For cube map array, the coordinate is (u,v,r,ai) but there are
* only derivatives for (u, v, r).
*/
if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
- emit(MOV(sources[length], lod));
+ bld.MOV(sources[length], lod);
lod = offset(lod, 1);
length++;
- emit(MOV(sources[length], lod2));
+ bld.MOV(sources[length], lod2);
lod2 = offset(lod2, 1);
length++;
}
@@ -1561,43 +648,43 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
break;
}
case ir_txs:
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
length++;
break;
case ir_query_levels:
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u));
length++;
break;
case ir_txf:
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
coordinate = offset(coordinate, 1);
length++;
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
length++;
for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
- coordinate = offset(coordinate, 1);
- length++;
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+ coordinate = offset(coordinate, 1);
+ length++;
}
coordinate_done = true;
break;
case ir_txf_ms:
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
length++;
/* data from the multisample control surface */
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
length++;
/* there is no offsetting for this message; just copy in the integer
* texture coordinates
*/
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
coordinate = offset(coordinate, 1);
length++;
}
@@ -1610,23 +697,20 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
no16("Gen7 does not support gather4_po_c in SIMD16 mode.");
/* More crazy intermixing */
- ir->offset->accept(this);
- fs_reg offset_value = this->result;
-
for (int i = 0; i < 2; i++) { /* u, v */
- emit(MOV(sources[length], coordinate));
+ bld.MOV(sources[length], coordinate);
coordinate = offset(coordinate, 1);
length++;
}
for (int i = 0; i < 2; i++) { /* offu, offv */
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value));
- offset_value = offset(offset_value, 1);
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_val);
+ offset_val = offset(offset_val, 1);
length++;
}
if (ir->coordinate->type->vector_elements == 3) { /* r if present */
- emit(MOV(sources[length], coordinate));
+ bld.MOV(sources[length], coordinate);
coordinate = offset(coordinate, 1);
length++;
}
@@ -1639,7 +723,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
/* Set up the coordinate (except for cases where it was done above) */
if (ir->coordinate && !coordinate_done) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(sources[length], coordinate));
+ bld.MOV(sources[length], coordinate);
coordinate = offset(coordinate, 1);
length++;
}
@@ -1651,9 +735,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
else
mlen = length * reg_width;
- fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
- BRW_REGISTER_TYPE_F);
- emit(LOAD_PAYLOAD(src_payload, sources, length));
+ fs_reg payload = bld.natural_reg(BRW_REGISTER_TYPE_F, mlen);
+ bld.LOAD_PAYLOAD(payload, sources, length);
/* Generate the SEND */
enum opcode opcode;
@@ -1676,7 +759,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
default:
unreachable("not reached");
}
- fs_inst *inst = emit(opcode, dst, src_payload, sampler);
+ instruction *inst = bld.emit(opcode, dst, payload, sampler);
inst->base_mrf = -1;
inst->mlen = mlen;
inst->header_present = header_present;
@@ -1690,489 +773,22 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
return inst;
}
-fs_reg
-fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
- bool is_rect, uint32_t sampler, int texunit)
-{
- fs_inst *inst = NULL;
- bool needs_gl_clamp = true;
- fs_reg scale_x, scale_y;
- const struct brw_sampler_prog_key_data *tex =
- (stage == MESA_SHADER_FRAGMENT) ?
- &((brw_wm_prog_key*) this->key)->tex : NULL;
- assert(tex);
-
- /* The 965 requires the EU to do the normalization of GL rectangle
- * texture coordinates. We use the program parameter state
- * tracking to get the scaling factor.
- */
- if (is_rect &&
- (brw->gen < 6 ||
- (brw->gen >= 6 && (tex->gl_clamp_mask[0] & (1 << sampler) ||
- tex->gl_clamp_mask[1] & (1 << sampler))))) {
- struct gl_program_parameter_list *params = prog->Parameters;
- int tokens[STATE_LENGTH] = {
- STATE_INTERNAL,
- STATE_TEXRECT_SCALE,
- texunit,
- 0,
- 0
- };
-
- no16("rectangle scale uniform setup not supported on SIMD16\n");
- if (dispatch_width == 16) {
- return coordinate;
- }
-
- GLuint index = _mesa_add_state_reference(params,
- (gl_state_index *)tokens);
- /* Try to find existing copies of the texrect scale uniforms. */
- for (unsigned i = 0; i < uniforms; i++) {
- if (stage_prog_data->param[i] ==
- &prog->Parameters->ParameterValues[index][0]) {
- scale_x = fs_reg(UNIFORM, i);
- scale_y = fs_reg(UNIFORM, i + 1);
- break;
- }
- }
-
- /* If we didn't already set them up, do so now. */
- if (scale_x.file == BAD_FILE) {
- scale_x = fs_reg(UNIFORM, uniforms);
- scale_y = fs_reg(UNIFORM, uniforms + 1);
-
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[index][0];
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[index][1];
- }
- }
-
- /* The 965 requires the EU to do the normalization of GL rectangle
- * texture coordinates. We use the program parameter state
- * tracking to get the scaling factor.
- */
- if (brw->gen < 6 && is_rect) {
- fs_reg dst = fs_reg(this, ir->coordinate->type);
- fs_reg src = coordinate;
- coordinate = dst;
-
- emit(MUL(dst, src, scale_x));
- dst = offset(dst, 1);
- src = offset(src, 1);
- emit(MUL(dst, src, scale_y));
- } else if (is_rect) {
- /* On gen6+, the sampler handles the rectangle coordinates
- * natively, without needing rescaling. But that means we have
- * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
- * not [0, 1] like the default case below.
- */
- needs_gl_clamp = false;
-
- for (int i = 0; i < 2; i++) {
- if (tex->gl_clamp_mask[i] & (1 << sampler)) {
- fs_reg chan = coordinate;
- chan = offset(chan, i);
-
- inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f));
- inst->conditional_mod = BRW_CONDITIONAL_G;
-
- /* Our parameter comes in as 1.0/width or 1.0/height,
- * because that's what people normally want for doing
- * texture rectangle handling. We need width or height
- * for clamping, but we don't care enough to make a new
- * parameter type, so just invert back.
- */
- fs_reg limit = fs_reg(this, glsl_type::float_type);
- emit(MOV(limit, i == 0 ? scale_x : scale_y));
- emit(SHADER_OPCODE_RCP, limit, limit);
-
- inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
- inst->conditional_mod = BRW_CONDITIONAL_L;
- }
- }
- }
-
- if (ir->coordinate && needs_gl_clamp) {
- for (unsigned int i = 0;
- i < MIN2(ir->coordinate->type->vector_elements, 3); i++) {
- if (tex->gl_clamp_mask[i] & (1 << sampler)) {
- fs_reg chan = coordinate;
- chan = offset(chan, i);
-
- fs_inst *inst = emit(MOV(chan, chan));
- inst->saturate = true;
- }
- }
- }
- return coordinate;
-}
-
-/* Sample from the MCS surface attached to this multisample texture. */
-fs_reg
-fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, fs_reg sampler)
-{
- int reg_width = dispatch_width / 8;
- int length = ir->coordinate->type->vector_elements;
- fs_reg payload = fs_reg(GRF, alloc.allocate(length * reg_width),
- BRW_REGISTER_TYPE_F);
- fs_reg dest = fs_reg(this, glsl_type::uvec4_type);
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, length);
-
- /* parameters are: u, v, r; missing parameters are treated as zero */
- for (int i = 0; i < length; i++) {
- sources[i] = fs_reg(this, glsl_type::float_type);
- emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate));
- coordinate = offset(coordinate, 1);
- }
-
- emit(LOAD_PAYLOAD(payload, sources, length));
-
- fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler);
- inst->base_mrf = -1;
- inst->mlen = length * reg_width;
- inst->header_present = false;
- inst->regs_written = 4 * reg_width; /* we only care about one reg of
- * response, but the sampler always
- * writes 4/8
- */
-
- return dest;
-}
-
-void
-fs_visitor::visit(ir_texture *ir)
-{
- const struct brw_sampler_prog_key_data *tex =
- (stage == MESA_SHADER_FRAGMENT) ?
- &((brw_wm_prog_key*) this->key)->tex : NULL;
- assert(tex);
- fs_inst *inst = NULL;
-
- uint32_t sampler =
- _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
-
- ir_rvalue *nonconst_sampler_index =
- _mesa_get_sampler_array_nonconst_index(ir->sampler);
-
- /* Handle non-constant sampler array indexing */
- fs_reg sampler_reg;
- if (nonconst_sampler_index) {
- /* The highest sampler which may be used by this operation is
- * the last element of the array. Mark it here, because the generator
- * doesn't have enough information to determine the bound.
- */
- uint32_t array_size = ir->sampler->as_dereference_array()
- ->array->type->array_size();
-
- uint32_t max_used = sampler + array_size - 1;
- if (ir->op == ir_tg4 && brw->gen < 8) {
- max_used += stage_prog_data->binding_table.gather_texture_start;
- } else {
- max_used += stage_prog_data->binding_table.texture_start;
- }
-
- brw_mark_surface_used(prog_data, max_used);
-
- /* Emit code to evaluate the actual indexing expression */
- nonconst_sampler_index->accept(this);
- fs_reg temp(this, glsl_type::uint_type);
- emit(ADD(temp, this->result, fs_reg(sampler)))
- ->force_writemask_all = true;
- sampler_reg = temp;
- } else {
- /* Single sampler, or constant array index; the indexing expression
- * is just an immediate.
- */
- sampler_reg = fs_reg(sampler);
- }
-
- /* FINISHME: We're failing to recompile our programs when the sampler is
- * updated. This only matters for the texture rectangle scale parameters
- * (pre-gen6, or gen6+ with GL_CLAMP).
- */
- int texunit = prog->SamplerUnits[sampler];
-
- if (ir->op == ir_tg4) {
- /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
- * emitting anything other than setting up the constant result.
- */
- ir_constant *chan = ir->lod_info.component->as_constant();
- int swiz = GET_SWZ(tex->swizzles[sampler], chan->value.i[0]);
- if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
-
- fs_reg res = fs_reg(this, glsl_type::vec4_type);
- this->result = res;
-
- for (int i=0; i<4; i++) {
- emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)));
- res = offset(res, 1);
- }
- return;
- }
- }
-
- /* Should be lowered by do_lower_texture_projection */
- assert(!ir->projector);
-
- /* Should be lowered */
- assert(!ir->offset || !ir->offset->type->is_array());
-
- /* Generate code to compute all the subexpression trees. This has to be
- * done before loading any values into MRFs for the sampler message since
- * generating these values may involve SEND messages that need the MRFs.
- */
- fs_reg coordinate;
- if (ir->coordinate) {
- ir->coordinate->accept(this);
-
- coordinate = rescale_texcoord(ir, this->result,
- ir->sampler->type->sampler_dimensionality ==
- GLSL_SAMPLER_DIM_RECT,
- sampler, texunit);
- }
-
- fs_reg shadow_comparitor;
- if (ir->shadow_comparitor) {
- ir->shadow_comparitor->accept(this);
- shadow_comparitor = this->result;
- }
-
- fs_reg lod, lod2, sample_index, mcs;
- switch (ir->op) {
- case ir_tex:
- case ir_lod:
- case ir_tg4:
- case ir_query_levels:
- break;
- case ir_txb:
- ir->lod_info.bias->accept(this);
- lod = this->result;
- break;
- case ir_txd:
- ir->lod_info.grad.dPdx->accept(this);
- lod = this->result;
-
- ir->lod_info.grad.dPdy->accept(this);
- lod2 = this->result;
- break;
- case ir_txf:
- case ir_txl:
- case ir_txs:
- ir->lod_info.lod->accept(this);
- lod = this->result;
- break;
- case ir_txf_ms:
- ir->lod_info.sample_index->accept(this);
- sample_index = this->result;
-
- if (brw->gen >= 7 && tex->compressed_multisample_layout_mask & (1<<sampler))
- mcs = emit_mcs_fetch(ir, coordinate, sampler_reg);
- else
- mcs = fs_reg(0u);
- break;
- default:
- unreachable("Unrecognized texture opcode");
- };
-
- /* Writemasking doesn't eliminate channels on SIMD8 texture
- * samples, so don't worry about them.
- */
- fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
-
- if (brw->gen >= 7) {
- inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
- lod, lod2, sample_index, mcs, sampler_reg);
- } else if (brw->gen >= 5) {
- inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
- lod, lod2, sample_index, sampler);
- } else {
- inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor,
- lod, lod2, sampler);
- }
-
- if (ir->offset != NULL && ir->op != ir_txf)
- inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant());
-
- if (ir->op == ir_tg4)
- inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17
-
- if (ir->shadow_comparitor)
- inst->shadow_compare = true;
-
- /* fixup #layers for cube map arrays */
- if (ir->op == ir_txs) {
- glsl_type const *type = ir->sampler->type;
- if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
- type->sampler_array) {
- fs_reg depth = offset(dst, 2);
- fs_reg fixed_depth = fs_reg(this, glsl_type::int_type);
- emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
-
- fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
- int components = inst->regs_written / (dst.width / 8);
- for (int i = 0; i < components; i++) {
- if (i == 2) {
- fixed_payload[i] = fixed_depth;
- } else {
- fixed_payload[i] = offset(dst, i);
- }
- }
- emit(LOAD_PAYLOAD(dst, fixed_payload, components));
- }
- }
-
- if (brw->gen == 6 && ir->op == ir_tg4) {
- emit_gen6_gather_wa(tex->gen6_gather_wa[sampler], dst);
- }
-
- swizzle_result(ir, dst, sampler);
-}
-
-/**
- * Apply workarounds for Gen6 gather with UINT/SINT
- */
-void
-fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
-{
- if (!wa)
- return;
-
- int width = (wa & WA_8BIT) ? 8 : 16;
-
- for (int i = 0; i < 4; i++) {
- fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
- /* Convert from UNORM to UINT */
- emit(MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1))));
- emit(MOV(dst, dst_f));
-
- if (wa & WA_SIGN) {
- /* Reinterpret the UINT value as a signed INT value by
- * shifting the sign bit into place, then shifting back
- * preserving sign.
- */
- emit(SHL(dst, dst, fs_reg(32 - width)));
- emit(ASR(dst, dst, fs_reg(32 - width)));
- }
-
- dst = offset(dst, 1);
- }
-}
-
-/**
- * Set up the gather channel based on the swizzle, for gather4.
- */
-uint32_t
-fs_visitor::gather_channel(ir_texture *ir, uint32_t sampler)
-{
- const struct brw_sampler_prog_key_data *tex =
- (stage == MESA_SHADER_FRAGMENT) ?
- &((brw_wm_prog_key*) this->key)->tex : NULL;
- assert(tex);
- ir_constant *chan = ir->lod_info.component->as_constant();
- int swiz = GET_SWZ(tex->swizzles[sampler], chan->value.i[0]);
- switch (swiz) {
- case SWIZZLE_X: return 0;
- case SWIZZLE_Y:
- /* gather4 sampler is broken for green channel on RG32F --
- * we must ask for blue instead.
- */
- if (tex->gather_channel_quirk_mask & (1<<sampler))
- return 2;
- return 1;
- case SWIZZLE_Z: return 2;
- case SWIZZLE_W: return 3;
- default:
- unreachable("Not reached"); /* zero, one swizzles handled already */
- }
-}
-
-/**
- * Swizzle the result of a texture result. This is necessary for
- * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
- */
-void
-fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, uint32_t sampler)
-{
- if (ir->op == ir_query_levels) {
- /* # levels is in .w */
- this->result = offset(orig_val, 3);
- return;
- }
-
- this->result = orig_val;
-
- /* txs,lod don't actually sample the texture, so swizzling the result
- * makes no sense.
- */
- if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4)
- return;
-
- const struct brw_sampler_prog_key_data *tex =
- (stage == MESA_SHADER_FRAGMENT) ?
- &((brw_wm_prog_key*) this->key)->tex : NULL;
- assert(tex);
-
- if (ir->type == glsl_type::float_type) {
- /* Ignore DEPTH_TEXTURE_MODE swizzling. */
- assert(ir->sampler->type->sampler_shadow);
- } else if (tex->swizzles[sampler] != SWIZZLE_NOOP) {
- fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
-
- for (int i = 0; i < 4; i++) {
- int swiz = GET_SWZ(tex->swizzles[sampler], i);
- fs_reg l = swizzled_result;
- l = offset(l, i);
-
- if (swiz == SWIZZLE_ZERO) {
- emit(MOV(l, fs_reg(0.0f)));
- } else if (swiz == SWIZZLE_ONE) {
- emit(MOV(l, fs_reg(1.0f)));
- } else {
- emit(MOV(l, offset(orig_val,
- GET_SWZ(tex->swizzles[sampler], i))));
- }
- }
- this->result = swizzled_result;
- }
-}
-
-void
-fs_visitor::visit(ir_swizzle *ir)
+fs_inst *
+fs_visitor::emit_texture(ir_texture *ir, const fs_reg &dst,
+ const fs_reg &coordinate, const fs_reg &shadow_c,
+ const fs_reg &lod, const fs_reg &lod2,
+ const fs_reg &offset_val, const fs_reg &sample_index,
+ const fs_reg &mcs, const fs_reg &sampler)
{
- ir->val->accept(this);
- fs_reg val = this->result;
-
- if (ir->type->vector_elements == 1) {
- this->result = offset(this->result, ir->mask.x);
- return;
- }
-
- fs_reg result = fs_reg(this, ir->type);
- this->result = result;
-
- for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
- fs_reg channel = val;
- int swiz = 0;
-
- switch (i) {
- case 0:
- swiz = ir->mask.x;
- break;
- case 1:
- swiz = ir->mask.y;
- break;
- case 2:
- swiz = ir->mask.z;
- break;
- case 3:
- swiz = ir->mask.w;
- break;
- }
-
- emit(MOV(result, offset(channel, swiz)));
- result = offset(result, 1);
- }
+ if (brw->gen >= 7)
+ return emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, lod2,
+ offset_val, sample_index, mcs, sampler);
+ else if (brw->gen >= 5)
+ return emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, lod2,
+ sample_index, sampler);
+ else
+ return emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, lod2,
+ sampler);
}
void
@@ -2187,8 +803,8 @@ fs_visitor::visit(ir_discard *ir)
*/
fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UW));
- fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
- BRW_CONDITIONAL_NZ));
+ fs_inst *cmp = bld.CMP(bld.reg_null_f(), some_reg, some_reg,
+ BRW_CONDITIONAL_NZ);
cmp->predicate = BRW_PREDICATE_NORMAL;
cmp->flag_subreg = 1;
@@ -2196,7 +812,7 @@ fs_visitor::visit(ir_discard *ir)
/* For performance, after a discard, jump to the end of the shader.
* Only jump if all relevant channels have been discarded.
*/
- fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
+ fs_inst *discard_jump = bld.emit(FS_OPCODE_DISCARD_JUMP);
discard_jump->flag_subreg = 1;
discard_jump->predicate = (dispatch_width == 8)
@@ -2206,292 +822,6 @@ fs_visitor::visit(ir_discard *ir)
}
}
-void
-fs_visitor::visit(ir_constant *ir)
-{
- /* Set this->result to reg at the bottom of the function because some code
- * paths will cause this visitor to be applied to other fields. This will
- * cause the value stored in this->result to be modified.
- *
- * Make reg constant so that it doesn't get accidentally modified along the
- * way. Yes, I actually had this problem. :(
- */
- const fs_reg reg(this, ir->type);
- fs_reg dst_reg = reg;
-
- if (ir->type->is_array()) {
- const unsigned size = type_size(ir->type->fields.array);
-
- for (unsigned i = 0; i < ir->type->length; i++) {
- ir->array_elements[i]->accept(this);
- fs_reg src_reg = this->result;
-
- dst_reg.type = src_reg.type;
- for (unsigned j = 0; j < size; j++) {
- emit(MOV(dst_reg, src_reg));
- src_reg = offset(src_reg, 1);
- dst_reg = offset(dst_reg, 1);
- }
- }
- } else if (ir->type->is_record()) {
- foreach_in_list(ir_constant, field, &ir->components) {
- const unsigned size = type_size(field->type);
-
- field->accept(this);
- fs_reg src_reg = this->result;
-
- dst_reg.type = src_reg.type;
- for (unsigned j = 0; j < size; j++) {
- emit(MOV(dst_reg, src_reg));
- src_reg = offset(src_reg, 1);
- dst_reg = offset(dst_reg, 1);
- }
- }
- } else {
- const unsigned size = type_size(ir->type);
-
- for (unsigned i = 0; i < size; i++) {
- switch (ir->type->base_type) {
- case GLSL_TYPE_FLOAT:
- emit(MOV(dst_reg, fs_reg(ir->value.f[i])));
- break;
- case GLSL_TYPE_UINT:
- emit(MOV(dst_reg, fs_reg(ir->value.u[i])));
- break;
- case GLSL_TYPE_INT:
- emit(MOV(dst_reg, fs_reg(ir->value.i[i])));
- break;
- case GLSL_TYPE_BOOL:
- emit(MOV(dst_reg,
- fs_reg(ir->value.b[i] != 0 ? ctx->Const.UniformBooleanTrue
- : 0)));
- break;
- default:
- unreachable("Non-float/uint/int/bool constant");
- }
- dst_reg = offset(dst_reg, 1);
- }
- }
-
- this->result = reg;
-}
-
-void
-fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
-{
- ir_expression *expr = ir->as_expression();
-
- if (!expr || expr->operation == ir_binop_ubo_load) {
- ir->accept(this);
-
- fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
- }
-
- fs_reg op[3];
- fs_inst *inst;
-
- assert(expr->get_num_operands() <= 3);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- assert(expr->operands[i]->type->is_scalar());
-
- expr->operands[i]->accept(this);
- op[i] = this->result;
-
- resolve_ud_negate(&op[i]);
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
-
- case ir_binop_logic_xor:
- if (ctx->Const.UniformBooleanTrue == 1) {
- fs_reg dst = fs_reg(this, glsl_type::uint_type);
- emit(XOR(dst, op[0], op[1]));
- inst = emit(AND(reg_null_d, dst, fs_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- } else {
- inst = emit(XOR(reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_binop_logic_or:
- if (ctx->Const.UniformBooleanTrue == 1) {
- fs_reg dst = fs_reg(this, glsl_type::uint_type);
- emit(OR(dst, op[0], op[1]));
- inst = emit(AND(reg_null_d, dst, fs_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- } else {
- inst = emit(OR(reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_binop_logic_and:
- if (ctx->Const.UniformBooleanTrue == 1) {
- fs_reg dst = fs_reg(this, glsl_type::uint_type);
- emit(AND(dst, op[0], op[1]));
- inst = emit(AND(reg_null_d, dst, fs_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- } else {
- inst = emit(AND(reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_unop_f2b:
- if (brw->gen >= 6) {
- emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
- } else {
- inst = emit(MOV(reg_null_f, op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_unop_i2b:
- if (brw->gen >= 6) {
- emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
- } else {
- inst = emit(MOV(reg_null_d, op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- if (ctx->Const.UniformBooleanTrue == 1) {
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
- }
-
- emit(CMP(reg_null_d, op[0], op[1],
- brw_conditional_for_comparison(expr->operation)));
- break;
-
- case ir_triop_csel: {
- /* Expand the boolean condition into the flag register. */
- inst = emit(MOV(reg_null_d, op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
- /* Select which boolean to return. */
- fs_reg temp(this, expr->operands[1]->type);
- inst = emit(SEL(temp, op[1], op[2]));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- /* Expand the result to a condition code. */
- inst = emit(MOV(reg_null_d, temp));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
- }
-
- default:
- unreachable("not reached");
- }
-}
-
-/**
- * Emit a gen6 IF statement with the comparison folded into the IF
- * instruction.
- */
-void
-fs_visitor::emit_if_gen6(ir_if *ir)
-{
- ir_expression *expr = ir->condition->as_expression();
-
- if (expr && expr->operation != ir_binop_ubo_load) {
- fs_reg op[3];
- fs_inst *inst;
- fs_reg temp;
-
- assert(expr->get_num_operands() <= 3);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- assert(expr->operands[i]->type->is_scalar());
-
- expr->operands[i]->accept(this);
- op[i] = this->result;
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_Z));
- return;
-
- case ir_binop_logic_xor:
- emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_logic_or:
- temp = fs_reg(this, glsl_type::bool_type);
- emit(OR(temp, op[0], op[1]));
- emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_logic_and:
- temp = fs_reg(this, glsl_type::bool_type);
- emit(AND(temp, op[0], op[1]));
- emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_unop_f2b:
- inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
-
- case ir_unop_i2b:
- emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- if (ctx->Const.UniformBooleanTrue == 1) {
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
- }
-
- emit(IF(op[0], op[1],
- brw_conditional_for_comparison(expr->operation)));
- return;
-
- case ir_triop_csel: {
- /* Expand the boolean condition into the flag register. */
- fs_inst *inst = emit(MOV(reg_null_d, op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
- /* Select which boolean to use as the result. */
- fs_reg temp(this, expr->operands[1]->type);
- inst = emit(SEL(temp, op[1], op[2]));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
- return;
- }
-
- default:
- unreachable("not reached");
- }
- }
-
- ir->condition->accept(this);
- emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ));
-}
-
/**
* Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL.
*
@@ -2558,19 +888,19 @@ fs_visitor::try_replace_with_sel()
if (src0.file == IMM) {
src0 = fs_reg(this, glsl_type::float_type);
src0.type = then_mov->src[0].type;
- emit(MOV(src0, then_mov->src[0]));
+ bld.MOV(src0, then_mov->src[0]);
}
fs_inst *sel;
if (if_inst->conditional_mod) {
/* Sandybridge-specific IF with embedded comparison */
- emit(CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod));
- sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
+ bld.CMP(bld.reg_null_d(), if_inst->src[0], if_inst->src[1],
+ if_inst->conditional_mod);
+ sel = bld.emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
sel->predicate = BRW_PREDICATE_NORMAL;
} else {
/* Separate CMP and IF instructions */
- sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
+ sel = bld.emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
sel->predicate = if_inst->predicate;
sel->predicate_inverse = if_inst->predicate_inverse;
}
@@ -2578,165 +908,6 @@ fs_visitor::try_replace_with_sel()
}
void
-fs_visitor::visit(ir_if *ir)
-{
- if (brw->gen < 6) {
- no16("Can't support (non-uniform) control flow on SIMD16\n");
- }
-
- /* Don't point the annotation at the if statement, because then it plus
- * the then and else blocks get printed.
- */
- this->base_ir = ir->condition;
-
- if (brw->gen == 6) {
- emit_if_gen6(ir);
- } else {
- emit_bool_to_cond_code(ir->condition);
-
- emit(IF(BRW_PREDICATE_NORMAL));
- }
-
- foreach_in_list(ir_instruction, ir_, &ir->then_instructions) {
- this->base_ir = ir_;
- ir_->accept(this);
- }
-
- if (!ir->else_instructions.is_empty()) {
- emit(BRW_OPCODE_ELSE);
-
- foreach_in_list(ir_instruction, ir_, &ir->else_instructions) {
- this->base_ir = ir_;
- ir_->accept(this);
- }
- }
-
- emit(BRW_OPCODE_ENDIF);
-
- try_replace_with_sel();
-}
-
-void
-fs_visitor::visit(ir_loop *ir)
-{
- if (brw->gen < 6) {
- no16("Can't support (non-uniform) control flow on SIMD16\n");
- }
-
- this->base_ir = NULL;
- emit(BRW_OPCODE_DO);
-
- foreach_in_list(ir_instruction, ir_, &ir->body_instructions) {
- this->base_ir = ir_;
- ir_->accept(this);
- }
-
- this->base_ir = NULL;
- emit(BRW_OPCODE_WHILE);
-}
-
-void
-fs_visitor::visit(ir_loop_jump *ir)
-{
- switch (ir->mode) {
- case ir_loop_jump::jump_break:
- emit(BRW_OPCODE_BREAK);
- break;
- case ir_loop_jump::jump_continue:
- emit(BRW_OPCODE_CONTINUE);
- break;
- }
-}
-
-void
-fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
-{
- ir_dereference *deref = static_cast<ir_dereference *>(
- ir->actual_parameters.get_head());
- ir_variable *location = deref->variable_referenced();
- unsigned surf_index = (stage_prog_data->binding_table.abo_start +
- location->data.binding);
-
- /* Calculate the surface offset */
- fs_reg offset(this, glsl_type::uint_type);
- ir_dereference_array *deref_array = deref->as_dereference_array();
-
- if (deref_array) {
- deref_array->array_index->accept(this);
-
- fs_reg tmp(this, glsl_type::uint_type);
- emit(MUL(tmp, this->result, fs_reg(ATOMIC_COUNTER_SIZE)));
- emit(ADD(offset, tmp, fs_reg(location->data.atomic.offset)));
- } else {
- offset = fs_reg(location->data.atomic.offset);
- }
-
- /* Emit the appropriate machine instruction */
- const char *callee = ir->callee->function_name();
- ir->return_deref->accept(this);
- fs_reg dst = this->result;
-
- if (!strcmp("__intrinsic_atomic_read", callee)) {
- emit_untyped_surface_read(surf_index, dst, offset);
-
- } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
- emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
- fs_reg(), fs_reg());
-
- } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
- emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
- fs_reg(), fs_reg());
- }
-}
-
-void
-fs_visitor::visit(ir_call *ir)
-{
- const char *callee = ir->callee->function_name();
-
- if (!strcmp("__intrinsic_atomic_read", callee) ||
- !strcmp("__intrinsic_atomic_increment", callee) ||
- !strcmp("__intrinsic_atomic_predecrement", callee)) {
- visit_atomic_counter_intrinsic(ir);
- } else {
- unreachable("Unsupported intrinsic.");
- }
-}
-
-void
-fs_visitor::visit(ir_return *)
-{
- unreachable("FINISHME");
-}
-
-void
-fs_visitor::visit(ir_function *ir)
-{
- /* Ignore function bodies other than main() -- we shouldn't see calls to
- * them since they should all be inlined before we get to ir_to_mesa.
- */
- if (strcmp(ir->name, "main") == 0) {
- const ir_function_signature *sig;
- exec_list empty;
-
- sig = ir->matching_signature(NULL, &empty, false);
-
- assert(sig);
-
- foreach_in_list(ir_instruction, ir_, &sig->body) {
- this->base_ir = ir_;
- ir_->accept(this);
- }
- }
-}
-
-void
-fs_visitor::visit(ir_function_signature *)
-{
- unreachable("not reached");
-}
-
-void
fs_visitor::visit(ir_emit_vertex *)
{
unreachable("not reached");
@@ -2748,129 +919,6 @@ fs_visitor::visit(ir_end_primitive *)
unreachable("not reached");
}
-void
-fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- fs_reg dst, fs_reg offset, fs_reg src0,
- fs_reg src1)
-{
- bool uses_kill =
- (stage == MESA_SHADER_FRAGMENT) &&
- ((brw_wm_prog_data*) this->prog_data)->uses_kill;
- int reg_width = dispatch_width / 8;
- int length = 0;
-
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4);
-
- sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- /* Initialize the sample mask in the message header. */
- emit(MOV(sources[0], fs_reg(0u)))
- ->force_writemask_all = true;
-
- if (uses_kill) {
- emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1)))
- ->force_writemask_all = true;
- } else {
- emit(MOV(component(sources[0], 7),
- retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
- ->force_writemask_all = true;
- }
- length++;
-
- /* Set the atomic operation offset. */
- sources[1] = fs_reg(this, glsl_type::uint_type);
- emit(MOV(sources[1], offset));
- length++;
-
- /* Set the atomic operation arguments. */
- if (src0.file != BAD_FILE) {
- sources[length] = fs_reg(this, glsl_type::uint_type);
- emit(MOV(sources[length], src0));
- length++;
- }
-
- if (src1.file != BAD_FILE) {
- sources[length] = fs_reg(this, glsl_type::uint_type);
- emit(MOV(sources[length], src1));
- length++;
- }
-
- int mlen = 1 + (length - 1) * reg_width;
- fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
- BRW_REGISTER_TYPE_UD);
- emit(LOAD_PAYLOAD(src_payload, sources, length));
-
- /* Emit the instruction. */
- fs_inst *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload,
- fs_reg(atomic_op), fs_reg(surf_index));
- inst->mlen = mlen;
-}
-
-void
-fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
- fs_reg offset)
-{
- bool uses_kill =
- (stage == MESA_SHADER_FRAGMENT) &&
- ((brw_wm_prog_data*) this->prog_data)->uses_kill;
- int reg_width = dispatch_width / 8;
-
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
-
- sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- /* Initialize the sample mask in the message header. */
- emit(MOV(sources[0], fs_reg(0u)))
- ->force_writemask_all = true;
-
- if (uses_kill) {
- emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1)))
- ->force_writemask_all = true;
- } else {
- emit(MOV(component(sources[0], 7),
- retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
- ->force_writemask_all = true;
- }
-
- /* Set the surface read offset. */
- sources[1] = fs_reg(this, glsl_type::uint_type);
- emit(MOV(sources[1], offset));
-
- int mlen = 1 + reg_width;
- fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
- BRW_REGISTER_TYPE_UD);
- fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2));
-
- /* Emit the instruction. */
- inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload,
- fs_reg(surf_index));
- inst->mlen = mlen;
-}
-
-fs_inst *
-fs_visitor::emit(fs_inst *inst)
-{
- if (force_uncompressed_stack > 0)
- inst->exec_size = 8;
-
- if (dispatch_width == 16 && inst->exec_size == 8)
- inst->force_uncompressed = true;
-
- inst->annotation = this->current_annotation;
- inst->ir = this->base_ir;
-
- this->instructions.push_tail(inst);
-
- return inst;
-}
-
-void
-fs_visitor::emit(exec_list list)
-{
- foreach_in_list_safe(fs_inst, inst, &list) {
- inst->exec_node::remove();
- emit(inst);
- }
-}
-
/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
void
fs_visitor::emit_dummy_fs()
@@ -2878,13 +926,13 @@ fs_visitor::emit_dummy_fs()
int reg_width = dispatch_width / 8;
/* Everyone's favorite color. */
- emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
- emit(MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)));
- emit(MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)));
- emit(MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f));
+ bld.MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f));
+ bld.MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f));
+ bld.MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f));
fs_inst *write;
- write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
+ write = bld.emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
write->base_mrf = 2;
write->mlen = 4 * reg_width;
write->eot = true;
@@ -2911,16 +959,16 @@ fs_visitor::interp_reg(int location, int channel)
void
fs_visitor::emit_interpolation_setup_gen4()
{
- this->current_annotation = "compute pixel centers";
+ bld.set_annotation("compute pixel centers");
this->pixel_x = fs_reg(this, glsl_type::uint_type);
this->pixel_y = fs_reg(this, glsl_type::uint_type);
this->pixel_x.type = BRW_REGISTER_TYPE_UW;
this->pixel_y.type = BRW_REGISTER_TYPE_UW;
- emit(FS_OPCODE_PIXEL_X, this->pixel_x);
- emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
+ bld.emit(FS_OPCODE_PIXEL_X, this->pixel_x);
+ bld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
- this->current_annotation = "compute pixel deltas from v0";
+ bld.set_annotation("compute pixel deltas from v0");
if (brw->has_pln) {
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
fs_reg(this, glsl_type::vec2_type);
@@ -2932,24 +980,24 @@ fs_visitor::emit_interpolation_setup_gen4()
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
fs_reg(this, glsl_type::float_type);
}
- emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
- this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))));
- emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
- this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))));
+ bld.ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+ this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))));
+ bld.ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+ this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
- this->current_annotation = "compute pos.w and 1/pos.w";
+ bld.set_annotation("compute pos.w and 1/pos.w");
/* Compute wpos.w. It's always in our setup, since it's needed to
* interpolate the other attributes.
*/
this->wpos_w = fs_reg(this, glsl_type::float_type);
- emit(FS_OPCODE_LINTERP, wpos_w,
+ bld.emit(FS_OPCODE_LINTERP, wpos_w,
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
interp_reg(VARYING_SLOT_POS, 3));
/* Compute the pixel 1/W value from wpos.w. */
this->pixel_w = fs_reg(this, glsl_type::float_type);
- emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
- this->current_annotation = NULL;
+ bld.emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
+ bld.set_annotation(NULL);
}
/** Emits the interpolation for the varying inputs. */
@@ -2959,17 +1007,17 @@ fs_visitor::emit_interpolation_setup_gen6()
struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
/* If the pixel centers end up used, the setup is the same as for gen4. */
- this->current_annotation = "compute pixel centers";
+ bld.set_annotation("compute pixel centers");
fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
int_pixel_x.type = BRW_REGISTER_TYPE_UW;
int_pixel_y.type = BRW_REGISTER_TYPE_UW;
- emit(ADD(int_pixel_x,
+ bld.ADD(int_pixel_x,
fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
- fs_reg(brw_imm_v(0x10101010))));
- emit(ADD(int_pixel_y,
+ fs_reg(brw_imm_v(0x10101010)));
+ bld.ADD(int_pixel_y,
fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
- fs_reg(brw_imm_v(0x11001100))));
+ fs_reg(brw_imm_v(0x11001100)));
/* As of gen6, we can no longer mix float and int sources. We have
* to turn the integer pixel centers into floats for their actual
@@ -2977,13 +1025,13 @@ fs_visitor::emit_interpolation_setup_gen6()
*/
this->pixel_x = fs_reg(this, glsl_type::float_type);
this->pixel_y = fs_reg(this, glsl_type::float_type);
- emit(MOV(this->pixel_x, int_pixel_x));
- emit(MOV(this->pixel_y, int_pixel_y));
+ bld.MOV(this->pixel_x, int_pixel_x);
+ bld.MOV(this->pixel_y, int_pixel_y);
- this->current_annotation = "compute pos.w";
+ bld.set_annotation("compute pos.w");
this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0));
this->wpos_w = fs_reg(this, glsl_type::float_type);
- emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
+ bld.emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
uint8_t reg = payload.barycentric_coord_reg[i];
@@ -2991,7 +1039,7 @@ fs_visitor::emit_interpolation_setup_gen6()
this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
}
- this->current_annotation = NULL;
+ bld.set_annotation(NULL);
}
int
@@ -3035,7 +1083,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
if (colors_enabled & (1 << i)) {
dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8),
color.type, color.width);
- inst = emit(MOV(dst[len], offset(color, i)));
+ inst = bld.MOV(dst[len], offset(color, i));
inst->saturate = key->clamp_fragment_color;
} else if (color.width == 16) {
/* We need two BAD_FILE slots for a 16-wide color */
@@ -3058,11 +1106,11 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
for (unsigned i = 0; i < 4; ++i) {
if (colors_enabled & (1 << i)) {
dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
- inst = emit(MOV(dst[i], half(offset(color, i), 0)));
+ inst = bld.MOV(dst[i], half(offset(color, i), 0));
inst->saturate = key->clamp_fragment_color;
dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type);
- inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
+ inst = bld.MOV(dst[i + 4], half(offset(color, i), 1));
inst->saturate = key->clamp_fragment_color;
inst->force_sechalf = true;
}
@@ -3101,7 +1149,7 @@ fs_visitor::emit_alpha_test()
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- this->current_annotation = "Alpha test";
+ bld.set_annotation("Alpha test");
fs_inst *cmp;
if (key->alpha_test_func == GL_ALWAYS)
@@ -3111,15 +1159,15 @@ fs_visitor::emit_alpha_test()
/* f0.1 = 0 */
fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UW));
- cmp = emit(CMP(reg_null_f, some_reg, some_reg,
- BRW_CONDITIONAL_NEQ));
+ cmp = bld.CMP(bld.reg_null_f(), some_reg, some_reg,
+ BRW_CONDITIONAL_NEQ);
} else {
/* RT0 alpha */
fs_reg color = offset(outputs[0], 3);
/* f0.1 &= func(color, ref) */
- cmp = emit(CMP(reg_null_f, color, fs_reg(key->alpha_test_ref),
- cond_for_alpha_func(key->alpha_test_func)));
+ cmp = bld.CMP(bld.reg_null_f(), color, fs_reg(key->alpha_test_ref),
+ cond_for_alpha_func(key->alpha_test_func));
}
cmp->predicate = BRW_PREDICATE_NORMAL;
cmp->flag_subreg = 1;
@@ -3133,7 +1181,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- this->current_annotation = "FB write header";
+ bld.set_annotation("FB write header");
bool header_present = true;
int reg_size = dispatch_width / 8;
@@ -3163,22 +1211,22 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
if (payload.aa_dest_stencil_reg) {
sources[length] = fs_reg(GRF, alloc.allocate(1));
- emit(MOV(sources[length],
- fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
+ bld.MOV(sources[length],
+ fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
length++;
}
prog_data->uses_omask =
prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
if (prog_data->uses_omask) {
- this->current_annotation = "FB write oMask";
+ bld.set_annotation("FB write oMask");
assert(this->sample_mask.file != BAD_FILE);
/* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since
* it's unsinged single words, one vgrf is always 16-wide.
*/
sources[length] = fs_reg(GRF, alloc.allocate(1),
BRW_REGISTER_TYPE_UW, 16);
- emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
+ bld.emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
length++;
}
@@ -3192,7 +1240,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
if (src0_alpha.file != BAD_FILE) {
sources[length] = fs_reg(GRF, alloc.allocate(reg_size),
src0_alpha.type, src0_alpha.width);
- fs_inst *inst = emit(MOV(sources[length], src0_alpha));
+ fs_inst *inst = bld.MOV(sources[length], src0_alpha);
inst->saturate = key->clamp_fragment_color;
length++;
}
@@ -3217,19 +1265,19 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth.file != BAD_FILE);
- emit(MOV(sources[length], this->frag_depth));
+ bld.MOV(sources[length], this->frag_depth);
} else {
/* Pass through the payload depth. */
- emit(MOV(sources[length],
- fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
+ bld.MOV(sources[length],
+ fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)));
}
length++;
}
if (payload.dest_depth_reg) {
sources[length] = fs_reg(this, glsl_type::float_type);
- emit(MOV(sources[length],
- fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))));
+ bld.MOV(sources[length],
+ fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)));
length++;
}
@@ -3238,16 +1286,16 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
if (brw->gen >= 7) {
/* Send from the GRF */
fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
- load = emit(LOAD_PAYLOAD(payload, sources, length));
+ load = bld.LOAD_PAYLOAD(payload, sources, length);
payload.reg = alloc.allocate(load->regs_written);
load->dst = payload;
- write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
+ write = bld.emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
write->base_mrf = -1;
} else {
/* Send from the MRF */
- load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),
- sources, length));
- write = emit(FS_OPCODE_FB_WRITE);
+ load = bld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),
+ sources, length);
+ write = bld.emit(FS_OPCODE_FB_WRITE);
write->base_mrf = 1;
}
@@ -3278,17 +1326,17 @@ fs_visitor::emit_fb_writes()
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_end();
- this->current_annotation = ralloc_asprintf(this->mem_ctx,
- "FB dual-source write");
+ bld.set_annotation(ralloc_asprintf(this->mem_ctx,
+ "FB dual-source write"));
inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
reg_undef, 4);
inst->target = 0;
prog_data->dual_src_blend = true;
} else if (key->nr_color_regions > 0) {
for (int target = 0; target < key->nr_color_regions; target++) {
- this->current_annotation = ralloc_asprintf(this->mem_ctx,
+ bld.set_annotation(ralloc_asprintf(this->mem_ctx,
"FB write target %d",
- target);
+ target));
fs_reg src0_alpha;
if (brw->gen >= 6 && key->replicate_alpha && target != 0)
src0_alpha = offset(outputs[0], 3);
@@ -3315,32 +1363,7 @@ fs_visitor::emit_fb_writes()
}
inst->eot = true;
- this->current_annotation = NULL;
-}
-
-void
-fs_visitor::resolve_ud_negate(fs_reg *reg)
-{
- if (reg->type != BRW_REGISTER_TYPE_UD ||
- !reg->negate)
- return;
-
- fs_reg temp = fs_reg(this, glsl_type::uint_type);
- emit(MOV(temp, *reg));
- *reg = temp;
-}
-
-void
-fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
-{
- assert(ctx->Const.UniformBooleanTrue == 1);
-
- if (rvalue->type != glsl_type::bool_type)
- return;
-
- fs_reg temp = fs_reg(this, glsl_type::bool_type);
- emit(AND(temp, *reg, fs_reg(1)));
- *reg = temp;
+ bld.set_annotation(NULL);
}
fs_visitor::fs_visitor(struct brw_context *brw,
@@ -3350,59 +1373,53 @@ fs_visitor::fs_visitor(struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_fragment_program *fp,
unsigned dispatch_width)
- : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base,
- MESA_SHADER_FRAGMENT),
- reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)),
- reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)),
- reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)),
+ : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base, mem_ctx,
+ MESA_SHADER_FRAGMENT, INTEL_DEBUG & DEBUG_WM,
+ prog_data->uses_kill,
+ brw::fs_builder(brw, mem_ctx, alloc, instructions,
+ dispatch_width),
+ (dispatch_width == 16 ? ST_FS16 : ST_FS8),
+ prog_data->base.nr_params),
key(key), prog_data(&prog_data->base),
dispatch_width(dispatch_width)
{
- this->mem_ctx = mem_ctx;
init();
}
void
fs_visitor::init()
{
- this->failed = false;
this->simd16_unsupported = false;
this->no16_msg = NULL;
- this->variable_ht = hash_table_ctor(0,
- hash_table_pointer_hash,
- hash_table_pointer_compare);
memset(&this->payload, 0, sizeof(this->payload));
memset(this->outputs, 0, sizeof(this->outputs));
memset(this->output_components, 0, sizeof(this->output_components));
this->source_depth_to_render_target = false;
this->runtime_check_aads_emit = false;
- this->first_non_payload_grf = 0;
- this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
-
- this->current_annotation = NULL;
- this->base_ir = NULL;
- this->virtual_grf_start = NULL;
- this->virtual_grf_end = NULL;
this->live_intervals = NULL;
this->regs_live_at_ip = NULL;
- this->uniforms = 0;
this->last_scratch = 0;
this->pull_constant_loc = NULL;
this->push_constant_loc = NULL;
- this->force_uncompressed_stack = 0;
-
this->spilled_any_registers = false;
this->do_dual_src = false;
-
- if (dispatch_width == 8)
- this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
}
-fs_visitor::~fs_visitor()
+fs_reg
+fs_visitor::emit_untyped_surface_header()
{
- hash_table_dtor(this->variable_ht);
+ using namespace brw;
+ const fs_reg payload = half(bld.natural_reg(BRW_REGISTER_TYPE_UD), 0);
+ const fs_reg sample_mask =
+ (uses_kill ? brw_flag_reg(0, 1) :
+ retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
+
+ exec_all(exec_half(0, bld.MOV(payload, fs_reg(0u))));
+ exec_all(bld.MOV(component(payload, 7), sample_mask));
+
+ return payload;
}
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index daedb35a88c..31582635056 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -47,7 +47,6 @@ public:
fs_reg(fs_visitor *v, const struct glsl_type *type);
bool equals(const fs_reg &r) const;
- bool is_valid_3src() const;
bool is_contiguous() const;
fs_reg &apply_stride(unsigned stride);
@@ -82,6 +81,18 @@ public:
uint8_t stride;
};
+namespace brw {
+ template<>
+ struct reg_traits<fs_reg> {
+ typedef fs_reg src_reg;
+ typedef fs_reg dst_reg;
+
+ static const unsigned alloc_size = 1;
+ static const bool allows_swizzle = false;
+ static const bool allows_writemask = false;
+ };
+}
+
static inline fs_reg
byte_offset(fs_reg reg, unsigned delta)
{
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 2d5610b712d..cd495e8cb5f 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -30,7 +30,6 @@
namespace brw {
-class vec4_visitor;
class dst_reg;
class src_reg : public backend_reg
diff --git a/src/mesa/drivers/dri/i965/brw_ir_visitor.cpp b/src/mesa/drivers/dri/i965/brw_ir_visitor.cpp
new file mode 100644
index 00000000000..3e67aeda0af
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_ir_visitor.cpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright © 2010-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_ir_visitor.h"
+#include "brw_cfg.h"
+
+using namespace brw;
+
+base_visitor::base_visitor(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ void *mem_ctx,
+ gl_shader_stage stage,
+ bool debug_flag,
+ unsigned uniform_array_size)
+ : brw(brw),
+ ctx(&brw->ctx),
+ shader(shader_prog ?
+ (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
+ shader_prog(shader_prog),
+ prog(prog),
+ stage_prog_data(stage_prog_data),
+ mem_ctx(mem_ctx),
+ cfg(NULL),
+ stage(stage),
+ fail_msg(NULL),
+ debug_flag(debug_flag),
+ failed(false),
+ first_non_payload_grf(0),
+ max_grf(brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF),
+ virtual_grf_start(NULL),
+ virtual_grf_end(NULL),
+ uniforms(0),
+ uniform_size(rzalloc_array(mem_ctx, int, uniform_array_size)),
+ uniform_vector_size(rzalloc_array(mem_ctx, int, uniform_array_size)),
+ uniform_array_size(uniform_array_size)
+{
+}
+
+void
+base_visitor::dump_instructions()
+{
+ dump_instructions(NULL);
+}
+
+void
+base_visitor::dump_instructions(const char *name)
+{
+ FILE *file = stderr;
+ if (name && geteuid() != 0) {
+ file = fopen(name, "w");
+ if (!file)
+ file = stderr;
+ }
+
+ int ip = 0;
+ foreach_block_and_inst(block, backend_instruction, inst, cfg) {
+ if (!name)
+ fprintf(stderr, "%d: ", ip++);
+ dump_instruction(inst, file);
+ }
+
+ if (file != stderr) {
+ fclose(file);
+ }
+}
+
+void
+base_visitor::calculate_cfg()
+{
+ if (this->cfg)
+ return;
+ cfg = new(mem_ctx) cfg_t(&this->instructions);
+}
+
+void
+base_visitor::invalidate_cfg()
+{
+ ralloc_free(this->cfg);
+ this->cfg = NULL;
+}
+
+/**
+ * Sets up the starting offsets for the groups of binding table entries
+ * commong to all pipeline stages.
+ *
+ * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
+ * unused but also make sure that addition of small offsets to them will
+ * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
+ */
+void
+base_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
+{
+ int num_textures = _mesa_fls(prog->SamplersUsed);
+
+ stage_prog_data->binding_table.texture_start = next_binding_table_offset;
+ next_binding_table_offset += num_textures;
+
+ if (shader) {
+ stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
+ next_binding_table_offset += shader->base.NumUniformBlocks;
+ } else {
+ stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
+ }
+
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+ stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
+ next_binding_table_offset++;
+ } else {
+ stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
+ }
+
+ if (prog->UsesGather) {
+ if (brw->gen >= 8) {
+ stage_prog_data->binding_table.gather_texture_start =
+ stage_prog_data->binding_table.texture_start;
+ } else {
+ stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
+ next_binding_table_offset += num_textures;
+ }
+ } else {
+ stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
+ }
+
+ if (shader_prog && shader_prog->NumAtomicBuffers) {
+ stage_prog_data->binding_table.abo_start = next_binding_table_offset;
+ next_binding_table_offset += shader_prog->NumAtomicBuffers;
+ } else {
+ stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
+ }
+
+ /* This may or may not be used depending on how the compile goes. */
+ stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
+ next_binding_table_offset++;
+
+ assert(next_binding_table_offset <= BRW_MAX_SURFACES);
+
+ /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
+}
+
+void
+base_visitor::vfail(const char *format, va_list va)
+{
+ char *msg;
+
+ if (failed)
+ return;
+
+ failed = true;
+
+ msg = ralloc_vasprintf(mem_ctx, format, va);
+ msg = ralloc_asprintf(mem_ctx, "compile failed: %s\n", msg);
+
+ this->fail_msg = msg;
+
+ if (debug_flag) {
+ fprintf(stderr, "%s", msg);
+ }
+}
+
+void
+base_visitor::fail(const char *format, ...)
+{
+ va_list va;
+
+ va_start(va, format);
+ vfail(format, va);
+ va_end(va);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_ir_visitor.h b/src/mesa/drivers/dri/i965/brw_ir_visitor.h
new file mode 100644
index 00000000000..876f162b91e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_ir_visitor.h
@@ -0,0 +1,2353 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_IR_VISITOR_H
+#define BRW_IR_VISITOR_H
+
+#include "brw_ir_builder.h"
+#include "brw_program.h"
+#include "program/hash_table.h"
+#include "glsl/ir_uniform.h"
+
+extern "C" {
+#include "program/sampler.h"
+}
+
+namespace brw {
+
+class base_visitor : public ir_visitor {
+protected:
+ base_visitor(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ void *mem_ctx,
+ gl_shader_stage stage,
+ bool debug_flag,
+ unsigned uniform_array_size);
+
+public:
+ struct brw_context *const brw;
+ struct gl_context *const ctx;
+ struct brw_shader *const shader;
+ struct gl_shader_program *const shader_prog;
+ struct gl_program *const prog;
+ struct brw_stage_prog_data *const stage_prog_data;
+
+ /** ralloc context for temporary data used during compile */
+ void *mem_ctx;
+
+ /**
+ * List of either fs_inst or vec4_instruction (inheriting from
+ * backend_instruction)
+ */
+ exec_list instructions;
+
+ cfg_t *cfg;
+
+ gl_shader_stage stage;
+
+ virtual void dump_instruction(backend_instruction *inst) = 0;
+ virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0;
+ virtual void dump_instructions();
+ virtual void dump_instructions(const char *name);
+
+ void calculate_cfg();
+ void invalidate_cfg();
+
+ void assign_common_binding_table_offsets(uint32_t next_binding_table_offset);
+
+ virtual void invalidate_live_intervals() = 0;
+
+ void vfail(const char *msg, va_list args);
+ void fail(const char *msg, ...);
+
+ char *fail_msg;
+ bool debug_flag;
+ bool failed;
+
+ int first_non_payload_grf;
+ /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
+ unsigned max_grf;
+ int *virtual_grf_start;
+ int *virtual_grf_end;
+
+ /** Number of uniform variable components visited. */
+ unsigned uniforms;
+ int *uniform_size;
+ int *uniform_vector_size;
+ unsigned uniform_array_size; /*< Size of uniform_[vector_]size arrays */
+};
+
+template<typename V, typename B>
+class backend_visitor : public base_visitor {
+protected:
+ typedef typename B::src_reg src_reg;
+ typedef typename B::dst_reg dst_reg;
+ typedef typename B::vector_builder::src_reg src_vector;
+ typedef typename B::vector_builder::dst_reg dst_vector;
+ typedef typename B::instruction instruction;
+
+ V &
+ self() {
+ return static_cast<V &>(*this);
+ }
+
+ backend_visitor(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ void *mem_ctx,
+ gl_shader_stage stage,
+ bool debug_flag,
+ bool uses_kill,
+ const B &bld,
+ shader_time_shader_type st_type,
+ unsigned uniform_array_size) :
+ base_visitor(brw, shader_prog, prog, stage_prog_data, mem_ctx, stage,
+ debug_flag, uniform_array_size),
+ variable_ht(hash_table_ctor(0,
+ hash_table_pointer_hash,
+ hash_table_pointer_compare)),
+ bld(bld), uses_kill(uses_kill), st_type(st_type)
+ {
+ }
+
+ ~backend_visitor()
+ {
+ hash_table_dtor(this->variable_ht);
+ }
+
+ src_reg
+ visit_result(ir_instruction *ir)
+ {
+ ir->accept(this);
+ assert(this->result.file != BAD_FILE);
+ return this->result;
+ }
+
+ unsigned
+ emit_constant_values(const dst_reg &dst, ir_constant *ir)
+ {
+ unsigned size = 0;
+
+ if (ir->type->is_record()) {
+ foreach_in_list(ir_constant, field_value, &ir->components)
+ size += emit_constant_values(offset(dst, size), field_value);
+
+ } else if (ir->type->is_array()) {
+ for (unsigned i = 0; i < ir->type->length; i++)
+ size += emit_constant_values(offset(dst, size),
+ ir->array_elements[i]);
+
+ } else {
+ const int n = ir->type->vector_elements;
+ typename B::vector_builder vbld = bld.vector();
+
+ for (int j = 0; j < ir->type->matrix_columns; j++) {
+ dst_vector tmp = retype(offset(dst_vector_n(dst, 4), size),
+ brw_type_for_base_type(ir->type));
+ unsigned mask = (1 << n) - 1;
+
+ while (mask) {
+ const int i = ffs(mask) - 1;
+
+ tmp.writemask = 1 << i;
+
+ /* Find other components that match the one we're about to
+ * write. Emits fewer instructions for things like vec4(0.5,
+ * 1.5, 1.5, 1.5).
+ */
+ for (int k = i + 1; k < n; k++) {
+ if (ir->type->base_type == GLSL_TYPE_BOOL) {
+ if (ir->value.b[j * n + i] == ir->value.b[j * n + k])
+ tmp.writemask |= 1 << k;
+ } else {
+ /* u, i, and f storage all line up, so no need for a
+ * switch case for comparing each type.
+ */
+ if (ir->value.u[j * n + i] == ir->value.u[j * n + k])
+ tmp.writemask |= 1 << k;
+ }
+ }
+
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ vbld.MOV(tmp, src_reg(ir->value.f[j * n + i]));
+ break;
+ case GLSL_TYPE_INT:
+ vbld.MOV(tmp, src_reg(ir->value.i[j * n + i]));
+ break;
+ case GLSL_TYPE_UINT:
+ vbld.MOV(tmp, src_reg(ir->value.u[j * n + i]));
+ break;
+ case GLSL_TYPE_BOOL:
+ vbld.MOV(tmp, src_reg(ir->value.b[j * n + i] ?
+ ctx->Const.UniformBooleanTrue : 0));
+ break;
+ default:
+ unreachable("Non-float/uint/int/bool constant");
+ }
+
+ mask &= ~tmp.writemask;
+ }
+
+ size += CEILING(n, alloc_size);
+ }
+ }
+
+ return size;
+ }
+
+ void
+ visit(ir_constant *ir)
+ {
+ dst_reg dst = self().temporary_reg(ir->type);
+ emit_constant_values(dst, ir);
+ this->result = src_reg(dst);
+ }
+
+ dst_reg *
+ variable_storage(ir_variable *var)
+ {
+ return (dst_reg *)hash_table_find(this->variable_ht, var);
+ }
+
+ /* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+ void
+ setup_builtin_uniform_values(ir_variable *ir)
+ {
+ const ir_state_slot *const slots = ir->get_state_slots();
+
+ for (unsigned i = 0; i < ir->get_num_state_slots(); i++) {
+ /* This state reference has already been setup by ir_to_mesa, but we'll
+ * get the same index back here.
+ */
+ int index = _mesa_add_state_reference(this->prog->Parameters,
+ (gl_state_index *)slots[i].tokens);
+ gl_constant_value *values = prog->Parameters->ParameterValues[index];
+ const unsigned n = size_for_swizzle(
+ from_glsl_swizzle(WRITEMASK_XYZW, slots[i].swizzle));
+
+ /* Add each of the unique swizzles of the element as a parameter.
+ * This'll end up matching the expected layout of the
+ * array/matrix/structure we're trying to fill in.
+ */
+ for (unsigned j = 0; j < MAX2(n, alloc_size); j++)
+ stage_prog_data->param[uniforms * alloc_size + j] =
+ &values[GET_SWZ(slots[i].swizzle, j)];
+
+ uniform_vector_size[uniforms] = n;
+ uniforms += CEILING(n, alloc_size);
+ }
+ }
+
+ /* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+ void
+ setup_uniform_values(ir_variable *ir)
+ {
+ int namelen = strlen(ir->name);
+
+ /* The data for our (non-builtin) uniforms is stored in a series of
+ * gl_uniform_driver_storage structs for each subcomponent that
+ * glGetUniformLocation() could name. We know it's been set up in the same
+ * order we'd walk the type, so walk the list of storage and find anything
+ * with our name, or the prefix of a component that starts with our name.
+ */
+ for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
+ struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+
+ if (strncmp(ir->name, storage->name, namelen) != 0 ||
+ (storage->name[namelen] != 0 &&
+ storage->name[namelen] != '.' &&
+ storage->name[namelen] != '[')) {
+ continue;
+ }
+
+ gl_constant_value *components = storage->storage;
+ unsigned vector_count = (MAX2(storage->array_elements, 1) *
+ storage->type->matrix_columns);
+
+ for (unsigned s = 0; s < vector_count; s++) {
+ unsigned i;
+ assert(uniforms < uniform_array_size);
+
+ for (i = 0; i < storage->type->vector_elements; i++) {
+ stage_prog_data->param[uniforms * alloc_size + i] =
+ &components[s * storage->type->vector_elements + i];
+ }
+ for (; i < alloc_size; i++) {
+ static const gl_constant_value zero = { 0.0 };
+ stage_prog_data->param[uniforms * alloc_size + i] = &zero;
+ }
+
+ uniform_vector_size[uniforms] = storage->type->vector_elements;
+ uniforms += CEILING(storage->type->vector_elements, alloc_size);
+ }
+ }
+ }
+
+ unsigned
+ type_vector_size(const struct glsl_type *type)
+ {
+ if (type->is_scalar() || type->is_vector() || type->is_matrix())
+ return type->vector_elements;
+ else
+ return 4;
+ }
+
+ void
+ visit(ir_variable *ir)
+ {
+ dst_reg *reg = NULL;
+
+ if (variable_storage(ir))
+ return;
+
+ if (ir->data.mode == ir_var_auto ||
+ ir->data.mode == ir_var_temporary) {
+ reg = new(mem_ctx) dst_reg(self().temporary_reg(ir->type));
+
+ } else if (ir->data.mode == ir_var_uniform) {
+ /* Thanks to the lower_ubo_reference pass, we will see only
+ * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
+ * variables, so no need for them to be in variable_ht.
+ *
+ * Some uniforms, such as samplers and atomic counters, have no actual
+ * storage, so we should ignore them.
+ */
+ if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
+ return;
+
+ if (bld.dispatch_width() == 16) {
+ fail("Failed to find uniform '%s' in SIMD16\n", ir->name);
+ return;
+ }
+
+ reg = new(mem_ctx) dst_reg(
+ resize(retype(dst_reg(UNIFORM, this->uniforms),
+ brw_type_for_base_type(ir->type)),
+ type_vector_size(ir->type)));
+
+ /* Track how big the whole uniform variable is, in case we need to put a
+ * copy of its data into pull constants for array access.
+ */
+ assert(this->uniforms < uniform_array_size);
+ this->uniform_size[this->uniforms] = type_size(ir->type);
+
+ if (!strncmp(ir->name, "gl_", 3)) {
+ setup_builtin_uniform_values(ir);
+ } else {
+ setup_uniform_values(ir);
+ }
+
+ } else {
+ unreachable("not reached");
+ }
+
+ hash_table_insert(this->variable_ht, reg, ir);
+ }
+
+ /** Walks an exec_list of ir_instruction and sends it through this visitor. */
+ void
+ visit_instructions(const exec_list *list)
+ {
+ foreach_in_list(ir_instruction, ir, list) {
+ bld.set_base_ir(ir);
+ ir->accept(this);
+ }
+ }
+
+ void
+ resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
+ {
+ assert(ctx->Const.UniformBooleanTrue == 1);
+
+ if (rvalue->type != glsl_type::bool_type)
+ return;
+
+ dst_reg temp = bld.natural_reg(BRW_REGISTER_TYPE_D);
+ bld.AND(temp, *reg, src_reg(1));
+ *reg = src_reg(temp);
+ }
+
+ void
+ visit(ir_dereference_variable *ir)
+ {
+ dst_reg *reg = variable_storage(ir->var);
+
+ if (!reg) {
+ fail("Failed to find variable storage for %s\n", ir->var->name);
+ this->result = src_reg(bld.reg_null_d());
+ return;
+ }
+
+ this->result = resize(src_reg(*reg), type_vector_size(ir->type));
+ }
+
+ void
+ visit(ir_dereference_record *ir)
+ {
+ const glsl_type *struct_type = ir->record->type;
+ unsigned off = 0;
+
+ ir->record->accept(this);
+
+ for (unsigned i = 0; i < struct_type->length; i++) {
+ if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+ break;
+ off += type_size(struct_type->fields.structure[i].type);
+ }
+
+ this->result = retype(resize(offset(this->result, off),
+ type_vector_size(ir->type)),
+ brw_type_for_base_type(ir->type));
+ }
+
+ virtual unsigned
+ get_array_stride(ir_dereference_array *ir)
+ {
+ /* Under normal circumstances array elements are stored consecutively, so
+ * the stride is equal to the size of the array element.
+ */
+ return type_size(ir->type);
+ }
+
+ void
+ visit(ir_dereference_array *ir)
+ {
+ ir_constant *constant_index = ir->array_index->constant_expression_value();
+ src_reg src = retype(visit_result(ir->array),
+ brw_type_for_base_type(ir->type));
+
+ if (constant_index) {
+ src = offset(src, constant_index->value.i[0] * get_array_stride(ir));
+ } else {
+ /* Variable index array dereference. We attach the variable index
+ * component to the reg as a pointer to a register containing the
+ * offset. Currently only uniform arrays are supported in this
+ * patch, and that reladdr pointer is resolved by
+ * move_uniform_array_access_to_pull_constants(). All other array
+ * types are lowered by lower_variable_index_to_cond_assign().
+ */
+ src_reg index_reg = visit_result(ir->array_index);
+
+ if (get_array_stride(ir) != 1) {
+ dst_reg tmp = bld.scalar_reg(BRW_REGISTER_TYPE_D);
+ bld.MUL(tmp, index_reg, src_reg(get_array_stride(ir)));
+ index_reg = src_reg(tmp);
+ }
+
+ if (src.reladdr) {
+ dst_reg tmp = bld.scalar_reg(BRW_REGISTER_TYPE_D);
+ bld.ADD(tmp, index_reg, *src.reladdr);
+ index_reg = src_reg(tmp);
+ }
+
+ src.reladdr = new(mem_ctx) src_reg(index_reg);
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ this->result = resize(src, type_vector_size(ir->type));
+ }
+
+ /**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+ void
+ emit_if_gen6(ir_if *ir)
+ {
+ ir_expression *expr = ir->condition->as_expression();
+
+ if (expr && expr->operation != ir_binop_ubo_load) {
+ bool is_scalar = true;
+ src_reg op[3];
+
+ assert(expr->get_num_operands() <= 3);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ is_scalar &= expr->operands[i]->type->is_scalar();
+ op[i] = visit_result(expr->operands[i]);
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ bld.IF(op[0], src_reg(0), BRW_CONDITIONAL_Z);
+ return;
+
+ case ir_binop_logic_xor:
+ bld.IF(op[0], op[1], BRW_CONDITIONAL_NZ);
+ return;
+
+ case ir_binop_logic_or: {
+ dst_reg temp = bld.scalar_reg(BRW_REGISTER_TYPE_D);
+ bld.OR(temp, op[0], op[1]);
+ bld.IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ);
+ return;
+ }
+ case ir_binop_logic_and: {
+ dst_reg temp = bld.scalar_reg(BRW_REGISTER_TYPE_D);
+ bld.AND(temp, op[0], op[1]);
+ bld.IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ);
+ return;
+ }
+ case ir_unop_f2b:
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.emit(BRW_OPCODE_IF, bld.reg_null_f(),
+ op[0], src_reg(0)));
+ return;
+
+ case ir_unop_i2b:
+ bld.IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ);
+ return;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_nequal:
+ case ir_binop_all_equal:
+ case ir_binop_any_nequal:
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ resolve_bool_comparison(expr->operands[0], &op[0]);
+ resolve_bool_comparison(expr->operands[1], &op[1]);
+ }
+
+ if (is_scalar) {
+ bld.IF(op[0], op[1],
+ brw_conditional_for_comparison(expr->operation));
+ } else {
+ bld.CMP(bld.reg_null_d(), op[0], op[1],
+ brw_conditional_for_comparison(expr->operation));
+ bld.IF(expr->operation == ir_binop_all_equal ?
+ BRW_PREDICATE_ALIGN16_ALL4H :
+ BRW_PREDICATE_ALIGN16_ANY4H);
+ }
+ return;
+
+ case ir_unop_any:
+ assert(!is_scalar);
+ bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ);
+ bld.IF(BRW_PREDICATE_ALIGN16_ANY4H);
+ return;
+
+ case ir_triop_csel: {
+ /* Expand the boolean condition into the flag register. */
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(bld.reg_null_d(), op[0]));
+
+ /* Select which boolean to return. */
+ dst_reg temp = bld.scalar_reg(op[1].type);
+ exec_predicate(BRW_PREDICATE_NORMAL,
+ bld.emit(BRW_OPCODE_SEL, temp, op[1], op[2]));
+ bld.IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ);
+ return;
+ }
+ default:
+ unreachable("not reached");
+ }
+ }
+
+ bld.IF(visit_result(ir->condition), src_reg(0), BRW_CONDITIONAL_NZ);
+ }
+
+ enum brw_predicate
+ emit_bool_to_cond_code(ir_rvalue *ir)
+ {
+ ir_expression *expr = ir->as_expression();
+ enum brw_predicate predicate = BRW_PREDICATE_NORMAL;
+
+ if (expr && expr->operation != ir_binop_ubo_load) {
+ bool is_scalar = true;
+ src_reg op[3];
+
+ assert(expr->get_num_operands() <= 3);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ is_scalar &= expr->operands[i]->type->is_scalar();
+ op[i] = bld.fix_condmod_negate(visit_result(expr->operands[i]));
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ exec_condmod(BRW_CONDITIONAL_Z,
+ bld.AND(bld.reg_null_d(), op[0], src_reg(1)));
+ break;
+
+ case ir_binop_logic_xor:
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+ bld.XOR(dst, op[0], op[1]);
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.reg_null_d(), src_reg(dst), src_reg(1)));
+ } else {
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.XOR(bld.reg_null_d(), op[0], op[1]));
+ }
+ break;
+
+ case ir_binop_logic_or:
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+ bld.OR(dst, op[0], op[1]);
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.reg_null_d(), src_reg(dst), src_reg(1)));
+ } else {
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.OR(bld.reg_null_d(), op[0], op[1]));
+ }
+ break;
+
+ case ir_binop_logic_and:
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+ bld.AND(dst, op[0], op[1]);
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.reg_null_d(), src_reg(dst), src_reg(1)));
+ } else {
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.reg_null_d(), op[0], op[1]));
+ }
+ break;
+
+ case ir_unop_f2b:
+ if (brw->gen >= 6)
+ bld.CMP(bld.reg_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ);
+ else
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(bld.reg_null_f(), op[0]));
+ break;
+
+ case ir_unop_i2b:
+ if (brw->gen >= 6)
+ bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ);
+ else
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(bld.reg_null_d(), op[0]));
+ break;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_nequal:
+ case ir_binop_all_equal:
+ case ir_binop_any_nequal:
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ resolve_bool_comparison(expr->operands[0], &op[0]);
+ resolve_bool_comparison(expr->operands[1], &op[1]);
+ }
+
+ bld.CMP(bld.reg_null_d(), op[0], op[1],
+ brw_conditional_for_comparison(expr->operation));
+
+ if (!is_scalar)
+ predicate = (expr->operation == ir_binop_all_equal ?
+ BRW_PREDICATE_ALIGN16_ALL4H :
+ BRW_PREDICATE_ALIGN16_ANY4H);
+ break;
+
+ case ir_unop_any:
+ assert(!is_scalar);
+ bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ);
+ predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ break;
+
+ case ir_triop_csel: {
+ /* Expand the boolean condition into the flag register. */
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(bld.reg_null_d(), op[0]));
+
+ /* Select which boolean to return. */
+ dst_reg temp = bld.natural_reg(op[1].type);
+ exec_predicate(BRW_PREDICATE_NORMAL,
+ bld.SEL(temp, op[1], op[2]));
+
+ /* Expand the result to a condition code. */
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(bld.reg_null_d(), src_reg(temp)));
+ break;
+ }
+
+ default:
+ unreachable("not reached");
+ }
+ } else {
+ exec_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.reg_null_d(), visit_result(ir), src_reg(1)));
+ }
+
+ return predicate;
+ }
+
+ void
+ visit(ir_if *ir)
+ {
+ /* Don't point the annotation at the if statement, because then it plus
+ * the then and else blocks get printed.
+ */
+ bld.set_base_ir(ir->condition);
+
+ if (brw->gen == 6) {
+ emit_if_gen6(ir);
+ } else {
+ bld.IF(emit_bool_to_cond_code(ir->condition));
+ }
+
+ visit_instructions(&ir->then_instructions);
+
+ if (!ir->else_instructions.is_empty()) {
+ bld.set_base_ir(ir->condition);
+ bld.emit(BRW_OPCODE_ELSE);
+
+ visit_instructions(&ir->else_instructions);
+ }
+
+ bld.set_base_ir(ir->condition);
+ bld.emit(BRW_OPCODE_ENDIF);
+
+ self().try_replace_with_sel();
+ }
+
+ void
+ visit(ir_loop *ir)
+ {
+ if (brw->gen < 6)
+ self().no16("Can't support (non-uniform) control flow on SIMD16\n");
+
+ /* We don't want debugging output to print the whole body of the
+ * loop as the annotation.
+ */
+ bld.set_base_ir(NULL);
+ bld.emit(BRW_OPCODE_DO);
+
+ visit_instructions(&ir->body_instructions);
+
+ bld.set_base_ir(NULL);
+ bld.emit(BRW_OPCODE_WHILE);
+ }
+
+ void
+ visit(ir_loop_jump *ir)
+ {
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ bld.emit(BRW_OPCODE_BREAK);
+ break;
+ case ir_loop_jump::jump_continue:
+ bld.emit(BRW_OPCODE_CONTINUE);
+ break;
+ }
+ }
+
+ src_reg
+ get_timestamp()
+ {
+ assert(brw->gen >= 7);
+ dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+
+ /* The caller wants the low 32 bits of the timestamp. Since it's running
+ * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
+ * which is plenty of time for our purposes. It is identical across the
+ * EUs, but since it's tracking GPU core speed it will increment at a
+ * varying rate as render P-states change.
+ *
+ * The caller could also check if render P-states have changed (or anything
+ * else that might disrupt timing) by reading back subregister 2 and
+ * checking if that field is != 0.
+ */
+ exec_all(bld.MOV(dst, brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_TIMESTAMP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_4,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW)));
+
+ return src_reg(dst);
+ }
+
+ void
+ emit_shader_time_begin()
+ {
+ bld.set_annotation("shader time start");
+ shader_start_time = get_timestamp();
+ }
+
+ void
+ emit_shader_time_end()
+ {
+ B ubld = bld.force_uncompressed();
+
+ bld.set_annotation("shader time end");
+ src_reg shader_end_time = get_timestamp();
+
+ /* Check that there weren't any timestamp reset events (assuming these
+ * were the only two timestamp reads that happened).
+ */
+ src_reg reset_end = component(shader_end_time, 3);
+
+ exec_condmod(BRW_CONDITIONAL_Z,
+ bld.AND(bld.reg_null_d(), reset_end, src_reg(1u)));
+ bld.IF(BRW_PREDICATE_NORMAL);
+
+ /* Take the current timestamp and get the delta. */
+ dst_reg diff = bld.scalar_reg(BRW_REGISTER_TYPE_UD);
+ ubld.ADD(diff, component(negate(shader_start_time), 0),
+ component(shader_end_time, 0));
+
+ /* If there were no instructions between the two timestamp gets, the diff
+ * is 2 cycles. Remove that overhead, so I can forget about that when
+ * trying to determine the time taken for single instructions.
+ */
+ ubld.ADD(diff, src_reg(diff), src_reg(-2u));
+
+ emit_shader_time_write(st_type, src_reg(diff));
+ emit_shader_time_write(st_type + ST_WRITTEN, src_reg(1u));
+ bld.emit(BRW_OPCODE_ELSE);
+ emit_shader_time_write(st_type + ST_RESET, src_reg(1u));
+ bld.emit(BRW_OPCODE_ENDIF);
+ }
+
+ void
+ emit_shader_time_write(int type, const src_reg &value)
+ {
+ B ubld = bld.force_uncompressed();
+ const int shader_time_index =
+ brw_get_shader_time_index(brw, shader_prog, prog,
+ (enum shader_time_shader_type)type);
+ const dst_reg payload = bld.natural_reg(BRW_REGISTER_TYPE_UD, 2);
+
+ ubld.MOV(payload, src_reg(shader_time_index * SHADER_TIME_STRIDE));
+ ubld.MOV(offset(payload, 1), value);
+ ubld.emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(payload));
+ }
+
+ void
+ emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
+ const dst_reg &dst, const src_reg &addr,
+ const src_reg &src0, const src_reg &src1)
+ {
+ const dst_reg payload = half(bld.natural_reg(BRW_REGISTER_TYPE_UD, 4), 0);
+ src_reg srcs[4];
+ unsigned h, n = 0;
+
+ /* Initialize the message header if necessary. */
+ srcs[n] = self().emit_untyped_surface_header();
+ n += h = (srcs[n].file == BAD_FILE ? 0 : 1);
+
+ /* Set the atomic operation offset. */
+ srcs[n] = src_reg(bld.natural_reg(BRW_REGISTER_TYPE_UD));
+ bld.MOV(dst_reg(srcs[n++]), addr);
+
+ /* Set the atomic operation arguments. */
+ if (src0.file != BAD_FILE) {
+ srcs[n] = src_reg(bld.natural_reg(BRW_REGISTER_TYPE_UD));
+ bld.MOV(dst_reg(srcs[n++]), src0);
+ }
+
+ if (src1.file != BAD_FILE) {
+ srcs[n] = src_reg(bld.natural_reg(BRW_REGISTER_TYPE_UD));
+ bld.MOV(dst_reg(srcs[n++]), src1);
+ }
+
+ /* Emit the instruction. Note that this maps to the normal
+ * SIMD8 untyped atomic message on Ivy Bridge when we are doing
+ * SIMD4x2, but that's OK because unused channels will be masked
+ * out.
+ */
+ bld.LOAD_PAYLOAD(payload, srcs, n);
+ bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_reg(payload),
+ src_reg(atomic_op), src_reg(surf_index))
+ ->mlen = (n - h) * bld.dispatch_width() / 8 + h;
+ }
+
+ void
+ emit_untyped_surface_read(unsigned surf_index, const dst_reg &dst,
+ const src_reg &addr)
+ {
+ const dst_reg payload = half(bld.natural_reg(BRW_REGISTER_TYPE_UD, 2), 0);
+ src_reg srcs[2];
+ unsigned h, n = 0;
+
+ /* Initialize the message header if necessary. */
+ srcs[n] = self().emit_untyped_surface_header();
+ n += h = (srcs[n].file == BAD_FILE ? 0 : 1);
+
+ /* Set the surface read offset. */
+ srcs[n] = src_reg(bld.natural_reg(BRW_REGISTER_TYPE_UD));
+ bld.MOV(dst_reg(srcs[n++]), addr);
+
+ /* Emit the instruction. Note that this maps to the normal
+ * SIMD8 untyped atomic message on Ivy Bridge when we are doing
+ * SIMD4x2, but that's OK because unused channels will be masked
+ * out.
+ */
+ bld.LOAD_PAYLOAD(payload, srcs, n);
+ bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
+ src_reg(payload), src_reg(surf_index))
+ ->mlen = h + bld.dispatch_width() / 8;
+ }
+
+ void
+ visit_atomic_counter_intrinsic(ir_call *ir)
+ {
+ ir_dereference *deref = static_cast<ir_dereference *>(
+ ir->actual_parameters.get_head());
+ ir_variable *location = deref->variable_referenced();
+ unsigned surf_index = (stage_prog_data->binding_table.abo_start +
+ location->data.binding);
+
+ /* Calculate the surface offset */
+ src_reg offset(bld.scalar_reg(BRW_REGISTER_TYPE_UD));
+ ir_dereference_array *deref_array = deref->as_dereference_array();
+
+ if (deref_array) {
+ src_reg tmp(bld.scalar_reg(BRW_REGISTER_TYPE_UD));
+ bld.MUL(dst_reg(tmp), visit_result(deref_array->array_index),
+ src_reg(ATOMIC_COUNTER_SIZE));
+ bld.ADD(dst_reg(offset), tmp, src_reg(location->data.atomic.offset));
+ } else {
+ offset = src_reg(location->data.atomic.offset);
+ }
+
+ /* Emit the appropriate machine instruction */
+ const char *callee = ir->callee->function_name();
+ dst_reg dst(visit_result(ir->return_deref));
+
+ if (!strcmp("__intrinsic_atomic_read", callee)) {
+ emit_untyped_surface_read(surf_index, dst, offset);
+
+ } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
+ emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
+ src_reg(), src_reg());
+
+ } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
+ emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
+ src_reg(), src_reg());
+ }
+ }
+
+ void
+ visit(ir_call *ir)
+ {
+ const char *callee = ir->callee->function_name();
+
+ if (!strcmp("__intrinsic_atomic_read", callee) ||
+ !strcmp("__intrinsic_atomic_increment", callee) ||
+ !strcmp("__intrinsic_atomic_predecrement", callee)) {
+ visit_atomic_counter_intrinsic(ir);
+ } else {
+ unreachable("Unsupported intrinsic.");
+ }
+ }
+
+ void
+ visit(ir_return *)
+ {
+ unreachable("FINISHME");
+ }
+
+ void
+ visit(ir_function_signature *)
+ {
+ unreachable("not reached");
+ }
+
+ void
+ visit(ir_function *ir)
+ {
+ /* Ignore function bodies other than main() -- we shouldn't see calls to
+ * them since they should all be inlined.
+ */
+ if (strcmp(ir->name, "main") == 0) {
+ const ir_function_signature *sig;
+ exec_list empty;
+
+ sig = ir->matching_signature(NULL, &empty, false);
+ assert(sig);
+
+ visit_instructions(&sig->body);
+ }
+ }
+
+ bool
+ try_emit_mad(ir_expression *ir)
+ {
+ /* 3-src instructions were introduced in gen6. */
+ if (brw->gen < 6)
+ return false;
+
+ /* MAD can only handle floating-point data. */
+ if (ir->type->base_type != GLSL_TYPE_FLOAT)
+ return false;
+
+ ir_rvalue *nonmul = ir->operands[1];
+ ir_expression *mul = ir->operands[0]->as_expression();
+
+ if (!mul || mul->operation != ir_binop_mul) {
+ nonmul = ir->operands[0];
+ mul = ir->operands[1]->as_expression();
+
+ if (!mul || mul->operation != ir_binop_mul)
+ return false;
+ }
+
+ if (nonmul->as_constant() ||
+ mul->operands[0]->as_constant() ||
+ mul->operands[1]->as_constant())
+ return false;
+
+ dst_reg result = self().temporary_reg(ir->type);
+ bld.MAD(result, bld.fix_3src_operand(visit_result(nonmul)),
+ bld.fix_3src_operand(visit_result(mul->operands[0])),
+ bld.fix_3src_operand(visit_result(mul->operands[1])));
+
+ this->result = src_reg(result);
+ return true;
+ }
+
+ /**
+ * Possibly returns an instruction that set up @param reg.
+ *
+ * Sometimes we want to take the result of some expression/variable
+ * dereference tree and rewrite the instruction generating the result
+ * of the tree. When processing the tree, we know that the
+ * instructions generated are all writing temporaries that are dead
+ * outside of this tree. So, if we have some instructions that write
+ * a temporary, we're free to point that temp write somewhere else.
+ *
+ * Note that this doesn't guarantee that the instruction generated
+ * only reg -- it might be the size=4 destination of a texture instruction.
+ */
+ instruction *
+ get_instruction_generating_reg(instruction *start,
+ instruction *end,
+ const src_vector &reg)
+ {
+ if (end == start ||
+ (end->predicate && end->opcode != BRW_OPCODE_SEL) ||
+ reg.reladdr || reg.abs || reg.negate ||
+ !is_identity_swizzle(get_writemask(end), reg.swizzle) ||
+ !storage(reg).equals(src_reg(end->dst))) {
+ return NULL;
+ } else {
+ return end;
+ }
+ }
+
+ bool
+ try_emit_saturate(ir_expression *ir)
+ {
+ instruction *pre_inst = (instruction *)this->instructions.get_tail();
+ src_reg src = visit_result(ir->operands[0]);
+ instruction *last_inst = (instruction *)this->instructions.get_tail();
+
+ /* If the last instruction from our accept() generated our
+ * src, just set the saturate flag instead of emmitting a separate mov.
+ */
+ instruction *modify = get_instruction_generating_reg(
+ pre_inst, last_inst, src);
+
+ if (modify && modify->can_do_saturate() &&
+ get_writemask(modify) == (1u << ir->type->vector_elements) - 1) {
+ modify->saturate = true;
+ this->result = src;
+ return true;
+ }
+
+ return false;
+ }
+
+ bool
+ try_emit_b2f_of_compare(ir_expression *ir)
+ {
+ /* This optimization relies on CMP setting the destination to 0 when
+ * false. Early hardware only sets the least significant bit, and
+ * leaves the other bits undefined. So we can't use it.
+ */
+ if (brw->gen < 6)
+ return false;
+
+ ir_expression *const cmp = ir->operands[0]->as_expression();
+ if (cmp == NULL ||
+ !(cmp->operation == ir_binop_less ||
+ cmp->operation == ir_binop_greater ||
+ cmp->operation == ir_binop_lequal ||
+ cmp->operation == ir_binop_gequal ||
+ cmp->operation == ir_binop_equal ||
+ cmp->operation == ir_binop_nequal))
+ return false;
+
+ const src_reg src0 = visit_result(cmp->operands[0]);
+ const src_reg src1 = visit_result(cmp->operands[1]);
+
+ this->result = src_reg(self().temporary_reg(ir->type));
+ bld.CMP(dst_reg(this->result), src0, src1,
+ brw_conditional_for_comparison(cmp->operation));
+
+ /* If the comparison is false, this->result will just happen to be zero.
+ */
+ exec_predicate_inv(BRW_PREDICATE_NORMAL, true,
+ bld.emit(BRW_OPCODE_SEL, dst_reg(this->result),
+ this->result, src_reg(1.0f)));
+ return true;
+ }
+
+ /**
+ * Emit the correct dot-product instruction for the type of arguments
+ */
+ void
+ emit_dp(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+ unsigned elements)
+ {
+ static enum opcode dot_opcodes[] = {
+ BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
+ };
+
+ bld.emit(dot_opcodes[elements - 2], dst, src0, src1);
+ }
+
+ void
+ visit(ir_expression *ir)
+ {
+ unsigned int operand;
+ src_reg op[Elements(ir->operands)];
+ dst_reg temp;
+ bool is_scalar = true;
+
+ /* Deal with the real oddball stuff first */
+ switch (ir->operation) {
+ case ir_binop_add:
+ if (try_emit_mad(ir))
+ return;
+ break;
+ case ir_unop_b2f:
+ if (try_emit_b2f_of_compare(ir))
+ return;
+ break;
+ case ir_unop_saturate:
+ if (try_emit_saturate(ir))
+ return;
+ break;
+ case ir_unop_interpolate_at_centroid:
+ case ir_binop_interpolate_at_offset:
+ case ir_binop_interpolate_at_sample:
+ self().emit_interpolate_expression(ir);
+ return;
+ default:
+ break;
+ }
+
+ for (operand = 0; operand < ir->get_num_operands(); operand++) {
+ is_scalar &= ir->operands[operand]->type->is_scalar();
+ op[operand] = visit_result(ir->operands[operand]);
+
+ /* Matrix expression operands should have been broken down to vector
+ * operations already.
+ */
+ assert(!ir->operands[operand]->type->is_matrix());
+ }
+
+ /* Storage for our result. If our result goes into an assignment, it
+ * will just get copy-propagated out, so no worries.
+ */
+ dst_reg result_dst = self().temporary_reg(ir->type);
+ this->result = src_reg(result_dst);
+
+ switch (ir->operation) {
+ case ir_unop_logic_not:
+ if (ctx->Const.UniformBooleanTrue != 1) {
+ bld.NOT(result_dst, op[0]);
+ } else {
+ bld.XOR(result_dst, op[0], src_reg(1));
+ }
+ break;
+ case ir_unop_neg:
+ op[0].negate = !op[0].negate;
+ bld.MOV(result_dst, op[0]);
+ break;
+ case ir_unop_abs:
+ op[0].abs = true;
+ op[0].negate = false;
+ bld.MOV(result_dst, op[0]);
+ break;
+ case ir_unop_sign:
+ if (ir->type->is_float()) {
+ /* AND(val, 0x80000000) gives the sign bit.
+ *
+ * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
+ * zero.
+ */
+ bld.CMP(bld.reg_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ);
+
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ result_dst.type = BRW_REGISTER_TYPE_UD;
+ bld.AND(result_dst, op[0], src_reg(0x80000000u));
+
+ exec_predicate(BRW_PREDICATE_NORMAL,
+ bld.OR(result_dst, src_reg(result_dst),
+ src_reg(0x3f800000u)));
+ this->result.type = BRW_REGISTER_TYPE_F;
+ } else {
+ /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
+ * -> non-negative val generates 0x00000000.
+ * Predicated OR sets 1 if val is positive.
+ */
+ bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G);
+
+ bld.ASR(result_dst, op[0], src_reg(31));
+
+ exec_predicate(BRW_PREDICATE_NORMAL,
+ bld.OR(result_dst, this->result, src_reg(1)));
+ }
+ break;
+ case ir_unop_rcp:
+ bld.emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
+ break;
+ case ir_unop_exp2:
+ bld.emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
+ break;
+ case ir_unop_log2:
+ bld.emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
+ break;
+ case ir_unop_exp:
+ case ir_unop_log:
+ unreachable("not reached: should be handled by ir_explog_to_explog2");
+
+ case ir_unop_sin:
+ case ir_unop_sin_reduced:
+ bld.emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
+ break;
+ case ir_unop_cos:
+ case ir_unop_cos_reduced:
+ bld.emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
+ break;
+ case ir_unop_dFdx:
+ bld.emit(FS_OPCODE_DDX, result_dst, op[0], src_reg(BRW_DERIVATIVE_BY_HINT));
+ break;
+ case ir_unop_dFdx_coarse:
+ bld.emit(FS_OPCODE_DDX, result_dst, op[0], src_reg(BRW_DERIVATIVE_COARSE));
+ break;
+ case ir_unop_dFdx_fine:
+ bld.emit(FS_OPCODE_DDX, result_dst, op[0], src_reg(BRW_DERIVATIVE_FINE));
+ break;
+ case ir_unop_dFdy:
+ bld.emit(FS_OPCODE_DDY, result_dst, op[0], src_reg(BRW_DERIVATIVE_BY_HINT));
+ break;
+ case ir_unop_dFdy_coarse:
+ bld.emit(FS_OPCODE_DDY, result_dst, op[0], src_reg(BRW_DERIVATIVE_COARSE));
+ break;
+ case ir_unop_dFdy_fine:
+ bld.emit(FS_OPCODE_DDY, result_dst, op[0], src_reg(BRW_DERIVATIVE_FINE));
+ break;
+ case ir_binop_add:
+ bld.ADD(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_sub:
+ unreachable("not reached: should be handled by ir_sub_to_add_neg");
+ case ir_binop_mul:
+ if (brw->gen < 8 && ir->type->is_integer()) {
+ /* For integer multiplication, the MUL uses the low 16 bits
+ * of one of the operands (src0 on gen6, src1 on gen7). The
+ * MACH accumulates in the contribution of the upper 16 bits
+ * of that operand.
+ */
+ if (ir->operands[0]->is_uint16_constant()) {
+ if (brw->gen < 7)
+ bld.MUL(result_dst, op[0], op[1]);
+ else
+ bld.MUL(result_dst, op[1], op[0]);
+ } else if (ir->operands[1]->is_uint16_constant()) {
+ if (brw->gen < 7)
+ bld.MUL(result_dst, op[1], op[0]);
+ else
+ bld.MUL(result_dst, op[0], op[1]);
+ } else {
+ if (brw->gen >= 7)
+ self().no16("SIMD16 explicit accumulator operands unsupported\n");
+
+ dst_reg acc(retype(brw_acc_reg(bld.dispatch_width()),
+ this->result.type));
+
+ bld.MUL(acc, op[0], op[1]);
+ bld.MACH(bld.reg_null_d(), op[0], op[1]);
+ bld.MOV(result_dst, src_reg(acc));
+ }
+ } else {
+ bld.MUL(result_dst, op[0], op[1]);
+ }
+ break;
+ case ir_binop_imul_high: {
+ if (brw->gen == 7)
+ self().no16("SIMD16 explicit accumulator operands unsupported\n");
+
+ dst_reg acc(retype(brw_acc_reg(bld.dispatch_width()),
+ this->result.type));
+
+ instruction *mul = bld.MUL(acc, op[0], op[1]);
+ bld.MACH(result_dst, op[0], op[1]);
+
+ /* Until Gen8, integer multiplies read 32-bits from one source, and
+ * 16-bits from the other, and relying on the MACH instruction to
+ * generate the high bits of the result.
+ *
+ * On Gen8, the multiply instruction does a full 32x32-bit multiply,
+ * but in order to do a 64x64-bit multiply we have to simulate the
+ * previous behavior and then use a MACH instruction.
+ *
+ * FINISHME: Don't use source modifiers on src1.
+ */
+ if (brw->gen >= 8) {
+ assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
+ mul->src[1].type == BRW_REGISTER_TYPE_UD);
+ if (mul->src[1].type == BRW_REGISTER_TYPE_D) {
+ mul->src[1].type = BRW_REGISTER_TYPE_W;
+ } else {
+ mul->src[1].type = BRW_REGISTER_TYPE_UW;
+ }
+ }
+ break;
+ }
+ case ir_binop_div:
+ /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
+ assert(ir->type->is_integer());
+ bld.emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_carry: {
+ if (brw->gen == 7)
+ self().no16("SIMD16 explicit accumulator operands unsupported\n");
+
+ src_reg acc(retype(brw_acc_reg(bld.dispatch_width()),
+ BRW_REGISTER_TYPE_UD));
+
+ bld.ADDC(bld.reg_null_ud(), op[0], op[1]);
+ bld.MOV(result_dst, acc);
+ break;
+ }
+ case ir_binop_borrow: {
+ if (brw->gen == 7)
+ self().no16("SIMD16 explicit accumulator operands unsupported\n");
+
+ src_reg acc(retype(brw_acc_reg(bld.dispatch_width()),
+ BRW_REGISTER_TYPE_UD));
+
+ bld.SUBB(bld.reg_null_ud(), op[0], op[1]);
+ bld.MOV(result_dst, acc);
+ break;
+ }
+ case ir_binop_mod:
+ /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+ assert(ir->type->is_integer());
+ bld.emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_less:
+ case ir_binop_greater:
+ case ir_binop_lequal:
+ case ir_binop_gequal:
+ case ir_binop_equal:
+ case ir_binop_nequal:
+ case ir_binop_all_equal:
+ case ir_binop_any_nequal:
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ resolve_bool_comparison(ir->operands[0], &op[0]);
+ resolve_bool_comparison(ir->operands[1], &op[1]);
+ }
+
+ if (!is_scalar && ir->type->is_scalar()) {
+ bld.CMP(bld.reg_null_d(), op[0], op[1],
+ brw_conditional_for_comparison(ir->operation));
+ bld.MOV(result_dst, src_reg(0));
+ exec_predicate((ir->operation == ir_binop_all_equal ?
+ BRW_PREDICATE_ALIGN16_ALL4H :
+ BRW_PREDICATE_ALIGN16_ANY4H),
+ bld.MOV(result_dst,
+ src_reg(ctx->Const.UniformBooleanTrue)));
+ } else {
+ bld.CMP(result_dst, op[0], op[1],
+ brw_conditional_for_comparison(ir->operation));
+ }
+ break;
+ case ir_unop_any:
+ bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ);
+ bld.MOV(result_dst, src_reg(0));
+ exec_predicate(BRW_PREDICATE_ALIGN16_ANY4H,
+ bld.MOV(result_dst,
+ src_reg(ctx->Const.UniformBooleanTrue)));
+ break;
+ case ir_binop_logic_xor:
+ bld.XOR(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_logic_or:
+ bld.OR(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_logic_and:
+ bld.AND(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_dot:
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
+ break;
+ case ir_unop_noise:
+ unreachable("not reached: should be handled by lower_noise");
+
+ case ir_quadop_vector:
+ unreachable("not reached: should be handled by lower_quadop_vector");
+
+ case ir_binop_vector_extract:
+ unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()");
+
+ case ir_triop_vector_insert:
+ unreachable("not reached: should be handled by lower_vector_insert()");
+
+ case ir_binop_ldexp:
+ unreachable("not reached: should be handled by ldexp_to_arith()");
+
+ case ir_unop_sqrt:
+ bld.emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
+ break;
+ case ir_unop_rsq:
+ bld.emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
+ break;
+ case ir_unop_bitcast_i2f:
+ case ir_unop_bitcast_u2f:
+ op[0].type = BRW_REGISTER_TYPE_F;
+ this->result = op[0];
+ break;
+ case ir_unop_i2u:
+ case ir_unop_bitcast_f2u:
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ this->result = op[0];
+ break;
+ case ir_unop_u2i:
+ case ir_unop_bitcast_f2i:
+ op[0].type = BRW_REGISTER_TYPE_D;
+ this->result = op[0];
+ break;
+ case ir_unop_i2f:
+ case ir_unop_u2f:
+ case ir_unop_f2i:
+ case ir_unop_f2u:
+ bld.MOV(result_dst, op[0]);
+ break;
+ case ir_unop_b2i:
+ bld.AND(result_dst, op[0], src_reg(1));
+ break;
+ case ir_unop_b2f:
+ if (ctx->Const.UniformBooleanTrue != 1) {
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ result_dst.type = BRW_REGISTER_TYPE_UD;
+ bld.AND(result_dst, op[0], src_reg(0x3f800000u));
+ this->result.type = BRW_REGISTER_TYPE_F;
+ } else {
+ temp = self().temporary_reg(ir->operands[0]->type);
+ bld.AND(temp, op[0], src_reg(1));
+ bld.MOV(result_dst, src_reg(temp));
+ }
+ break;
+ case ir_unop_f2b:
+ bld.CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ);
+ break;
+ case ir_unop_i2b:
+ bld.CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ);
+ break;
+ case ir_unop_trunc:
+ bld.RNDZ(result_dst, op[0]);
+ break;
+ case ir_unop_ceil:
+ op[0].negate = !op[0].negate;
+ bld.RNDD(result_dst, op[0]);
+ this->result.negate = true;
+ break;
+ case ir_unop_floor:
+ bld.RNDD(result_dst, op[0]);
+ break;
+ case ir_unop_fract:
+ bld.FRC(result_dst, op[0]);
+ break;
+ case ir_unop_round_even:
+ bld.RNDE(result_dst, op[0]);
+ break;
+ case ir_binop_min:
+ case ir_binop_max:
+ bld.emit_minmax(ir->operation == ir_binop_min ?
+ BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
+ result_dst, op[0], op[1]);
+ break;
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_snorm_4x8:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_unorm_4x8:
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_snorm_4x8:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_unorm_4x8:
+ unreachable("not reached: should be handled by lower_packing_builtins");
+ case ir_unop_pack_half_2x16:
+ self().emit_pack_half_2x16(result_dst, op[0]);
+ break;
+ case ir_unop_unpack_half_2x16:
+ self().emit_unpack_half_2x16(result_dst, op[0]);
+ break;
+ case ir_unop_unpack_half_2x16_split_x:
+ bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result_dst, op[0]);
+ break;
+ case ir_unop_unpack_half_2x16_split_y:
+ bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result_dst, op[0]);
+ break;
+ case ir_binop_pow:
+ bld.emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
+ break;
+ case ir_unop_bitfield_reverse:
+ bld.BFREV(result_dst, op[0]);
+ break;
+ case ir_unop_bit_count:
+ bld.CBIT(result_dst, op[0]);
+ break;
+ case ir_unop_find_msb:
+ temp = retype(self().temporary_reg(ir->type),
+ BRW_REGISTER_TYPE_UD);
+ bld.FBH(temp, op[0]);
+
+ /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+ * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+ * subtract the result from 31 to convert the MSB count into an LSB count.
+ */
+
+ /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
+ bld.MOV(result_dst, src_reg(temp));
+ bld.CMP(bld.reg_null_d(), this->result, src_reg(-1), BRW_CONDITIONAL_NZ);
+ exec_predicate(BRW_PREDICATE_NORMAL,
+ bld.ADD(result_dst, negate(src_reg(temp)),
+ src_reg(31)));
+ break;
+ case ir_unop_find_lsb:
+ bld.FBL(result_dst, op[0]);
+ break;
+ case ir_unop_saturate:
+ bld.MOV(result_dst, op[0])
+ ->saturate = true;
+ break;
+ case ir_triop_bitfield_extract:
+ /* Note that the instruction's argument order is reversed from GLSL
+ * and the IR.
+ */
+ bld.BFE(result_dst, bld.fix_3src_operand(op[2]),
+ bld.fix_3src_operand(op[1]),
+ bld.fix_3src_operand(op[0]));
+ break;
+ case ir_binop_bfm:
+ bld.BFI1(result_dst, op[0], op[1]);
+ break;
+ case ir_triop_bfi:
+ bld.BFI2(result_dst, bld.fix_3src_operand(op[0]),
+ bld.fix_3src_operand(op[1]),
+ bld.fix_3src_operand(op[2]));
+ break;
+ case ir_quadop_bitfield_insert:
+ unreachable("not reached: should be handled by "
+ "lower_instructions::bitfield_insert_to_bfm_bfi");
+
+ case ir_unop_bit_not:
+ bld.NOT(result_dst, op[0]);
+ break;
+ case ir_binop_bit_and:
+ bld.AND(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_bit_xor:
+ bld.XOR(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_bit_or:
+ bld.OR(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_lshift:
+ bld.SHL(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_rshift:
+ if (ir->type->base_type == GLSL_TYPE_INT)
+ bld.ASR(result_dst, op[0], op[1]);
+ else
+ bld.SHR(result_dst, op[0], op[1]);
+ break;
+ case ir_binop_pack_half_2x16_split:
+ bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_ubo_load: {
+ /* This IR node takes a constant uniform block and a constant or
+ * variable byte offset within the block and loads a vector from that.
+ */
+ ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+ ir_constant *const_offset = ir->operands[1]->as_constant();
+ src_reg surf_index;
+
+ if (const_uniform_block) {
+ /* The block index is a constant, so just emit the binding table entry
+ * as an immediate.
+ */
+ surf_index = src_reg(stage_prog_data->binding_table.ubo_start +
+ const_uniform_block->value.u[0]);
+ } else {
+ /* The block index is not a constant. Evaluate the index expression
+ * per-channel and add the base UBO index; the generator will select
+ * a value from any live channel.
+ */
+ surf_index = src_reg(bld.scalar_reg(BRW_REGISTER_TYPE_UD));
+ exec_all(bld.ADD(dst_reg(surf_index), op[0],
+ src_reg(stage_prog_data->binding_table.ubo_start)));
+
+ /* Assume this may touch any UBO. It would be nice to provide
+ * a tighter bound, but the array information is already lowered away.
+ */
+ brw_mark_surface_used(stage_prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumUniformBlocks - 1);
+ }
+
+ if (const_offset) {
+ self().emit_pull_constant_load(bld, result_dst, surf_index,
+ const_offset->value.u[0], NULL,
+ ir->type->vector_elements);
+ } else {
+ src_reg reladdr(bld.scalar_reg(BRW_REGISTER_TYPE_D));
+
+ /* Turn the byte offset into alloc_size units. */
+ bld.SHR(dst_reg(reladdr), op[1], src_reg(alloc_size == 4 ? 4 : 2));
+
+ self().emit_pull_constant_load(bld, result_dst, surf_index, 0,
+ &reladdr, ir->type->vector_elements);
+ }
+
+ if (ir->type->base_type == GLSL_TYPE_BOOL) {
+ for (unsigned i = 0; i < CEILING(ir->type->vector_elements,
+ alloc_size); i++) {
+ /* UBO bools are any nonzero value. We consider bools to be
+ * values with the low bit set to 1. Convert them using CMP.
+ */
+ bld.CMP(offset(result_dst, i), offset(result, i),
+ src_reg(0u), BRW_CONDITIONAL_NZ);
+ }
+ }
+ break;
+ }
+ case ir_triop_fma:
+ /* Note that the instruction's argument order is reversed from GLSL
+ * and the IR.
+ */
+ bld.MAD(result_dst, bld.fix_3src_operand(op[2]),
+ bld.fix_3src_operand(op[1]),
+ bld.fix_3src_operand(op[0]));
+ break;
+ case ir_triop_lrp:
+ bld.LRP(result_dst, op[0], op[1], op[2]);
+ break;
+ case ir_triop_csel:
+ bld.CMP(bld.reg_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ);
+ exec_predicate(BRW_PREDICATE_NORMAL,
+ bld.emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]));
+ break;
+ case ir_unop_interpolate_at_centroid:
+ case ir_binop_interpolate_at_offset:
+ case ir_binop_interpolate_at_sample:
+ unreachable("already handled above");
+ break;
+ }
+ }
+
+ void
+ visit(ir_swizzle *ir)
+ {
+ const unsigned swz = compose_swizzle(
+ BRW_SWIZZLE4(ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w),
+ swizzle_for_size(ir->type->vector_elements));
+ dst_vector dst = dst_vector_n(self().temporary_reg(ir->type),
+ ir->type->vector_elements);
+ src_vector src = swizzle(src_vector_n(visit_result(ir->val), 4), swz);
+
+ if (reg_traits<src_reg>::allows_swizzle) {
+ this->result = storage(src);
+ } else {
+ bld.vector().MOV(dst, src);
+ this->result = src_reg(storage(dst));
+ }
+ }
+
+ unsigned
+ emit_assignment_writes(const dst_vector &l, const src_vector &r,
+ const glsl_type *type, enum brw_predicate predicate)
+ {
+ unsigned size = 0;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL: {
+ typename B::vector_builder vbld = bld.vector();
+
+ for (int j = 0; j < type->matrix_columns; j++) {
+ dst_vector dst = retype(offset(l, size),
+ brw_type_for_base_type(type));
+ src_vector src = retype(offset(r, size),
+ brw_type_for_base_type(type));
+
+ exec_predicate(predicate,
+ vbld.MOV(resize(dst, type->vector_elements), src));
+
+ size += CEILING(type->vector_elements, alloc_size);
+ }
+ break;
+ }
+ case GLSL_TYPE_ARRAY:
+ for (unsigned i = 0; i < type->length; i++)
+ size += emit_assignment_writes(offset(l, size),
+ offset(r, size),
+ type->fields.array,
+ predicate);
+ break;
+
+ case GLSL_TYPE_STRUCT:
+ for (unsigned i = 0; i < type->length; i++)
+ size += emit_assignment_writes(offset(l, size),
+ offset(r, size),
+ type->fields.structure[i].type,
+ predicate);
+ break;
+
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_ATOMIC_UINT:
+ break;
+
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_INTERFACE:
+ unreachable("not reached");
+ }
+
+ return size;
+ }
+
+ /* If the RHS processing resulted in an instruction generating a
+ * temporary value, and it would be easy to rewrite the instruction to
+ * generate its result right into the LHS instead, do so. This ends
+ * up reliably removing instructions where it can be tricky to do so
+ * later without real UD chain information.
+ */
+ bool
+ try_rewrite_rhs_to_dst(ir_assignment *ir,
+ const dst_vector &dst, const src_vector &src,
+ instruction *pre_rhs_inst,
+ instruction *last_rhs_inst)
+ {
+ /* Only attempt if we're doing a direct assignment. */
+ if (ir->condition ||
+ !(ir->lhs->type->is_scalar() ||
+ (ir->lhs->type->is_vector())))
+ return false;
+
+ /* Make sure the last instruction generated our source reg. */
+ instruction *modify = get_instruction_generating_reg(
+ pre_rhs_inst, last_rhs_inst, src);
+ if (!modify)
+ return false;
+
+ /* If last_rhs_inst wrote a different number of components than our LHS,
+ * we can't safely rewrite it.
+ */
+ if ((dst.writemask & ~get_writemask(modify)) ||
+ ((~dst.writemask & get_writemask(modify)) &&
+ !reg_traits<dst_reg>::allows_writemask))
+ return false;
+
+ /* Success! Rewrite the instruction. */
+ modify->dst = storage(dst);
+ return true;
+ }
+
+ void
+ visit(ir_assignment *ir)
+ {
+ const unsigned mask = (ir->lhs->type->is_vector() ? ir->write_mask :
+ ir->lhs->type->is_scalar() ? 0x1 : 0xf);
+ dst_vector l = writemask(dst_vector_n(visit_result(ir->lhs), 4), mask);
+ instruction *pre_rhs_inst = (instruction *)this->instructions.get_tail();
+ src_vector r = swizzle(src_vector_n(visit_result(ir->rhs), 4),
+ from_glsl_swizzle(mask, SWIZZLE_XYZW));
+ instruction *last_rhs_inst = (instruction *)this->instructions.get_tail();
+ enum brw_predicate predicate = BRW_PREDICATE_NONE;
+
+ if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
+ return;
+
+ if (ir->condition)
+ predicate = emit_bool_to_cond_code(ir->condition);
+
+ emit_assignment_writes(l, r, ir->lhs->type, predicate);
+ }
+
+ /* Sample from the MCS surface attached to this multisample texture. */
+ src_reg
+ emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler)
+ {
+ typename B::vector_builder vbld = bld.vector();
+ const unsigned reg_width = bld.dispatch_width() / 8;
+ const unsigned length = ir->coordinate->type->vector_elements;
+ const unsigned coord_mask = (1 << length) - 1;
+ const unsigned zero_mask = ((1 << alloc_size) - 1) & ~coord_mask;
+ dst_vector payload = vbld.natural_reg(brw_type_for_base_type(
+ ir->coordinate->type));
+ dst_vector dst = vbld.natural_reg(BRW_REGISTER_TYPE_UD);
+
+ vbld.MOV(writemask(payload, coord_mask), src_vector_n(coordinate, length));
+ vbld.MOV(writemask(payload, zero_mask), src_reg(0));
+
+ instruction *inst = bld.emit(SHADER_OPCODE_TXF_MCS, storage(dst),
+ src_reg(storage(payload)), sampler);
+ inst->base_mrf = -1;
+ inst->mlen = CEILING(length, alloc_size) * reg_width;
+ /* We only care about one component of response, but the sampler always
+ * writes 4.
+ */
+ inst->regs_written = CEILING(4, alloc_size) * reg_width;
+ return src_reg(storage(dst));
+ }
+
+ /**
+ * Apply workarounds for Gen6 gather with UINT/SINT
+ */
+ void
+ emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
+ {
+ if (!wa)
+ return;
+
+ typename B::vector_builder vbld = bld.vector();
+ const unsigned width = (wa & WA_8BIT) ? 8 : 16;
+ dst_vector vdst = dst_vector_n(dst, 4);
+ dst_vector vdst_f = retype(vdst, BRW_REGISTER_TYPE_F);
+
+ /* Convert from UNORM to UINT */
+ vbld.MUL(vdst_f, src_vector(vdst_f), src_reg((float)((1 << width) - 1)));
+ vbld.MOV(vdst, src_vector(vdst_f));
+
+ if (wa & WA_SIGN) {
+ /* Reinterpret the UINT value as a signed INT value by shifting the
+ * sign bit into place, then shifting back preserving sign.
+ */
+ vbld.SHL(vdst, src_vector(vdst), src_reg(32 - width));
+ vbld.ASR(vdst, src_vector(vdst), src_reg(32 - width));
+ }
+ }
+
+ /**
+ * Swizzle the result of a texture instruction. This is necessary for
+ * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow
+ * comparisons.
+ */
+ void
+ swizzle_result(ir_texture *ir, const src_reg &orig_val, uint32_t sampler)
+ {
+ const unsigned swz = self().sampler_prog_key()->swizzles[sampler];
+
+ if (ir->op == ir_query_levels) {
+ /* # levels is in .w */
+ this->result = component(src_vector_n(orig_val, 4), 3);
+ return;
+ }
+
+ this->result = resize(orig_val, ir->type->vector_elements);
+
+ /* txs,lod don't actually sample the texture, so swizzling the result
+ * makes no sense.
+ */
+ if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4)
+ return;
+
+ if (ir->type == glsl_type::float_type) {
+ /* Ignore DEPTH_TEXTURE_MODE swizzling. */
+ assert(ir->sampler->type->sampler_shadow);
+
+ } else if (swz != SWIZZLE_NOOP) {
+ typename B::vector_builder vbld = bld.vector();
+ dst_vector dst = vbld.natural_reg(orig_val.type);
+ unsigned zero_mask = 0, one_mask = 0, copy_mask = 0;
+
+ for (int i = 0; i < 4; i++) {
+ switch (GET_SWZ(swz, i)) {
+ case SWIZZLE_ZERO:
+ zero_mask |= (1 << i);
+ break;
+ case SWIZZLE_ONE:
+ one_mask |= (1 << i);
+ break;
+ default:
+ copy_mask |= (1 << i);
+ break;
+ }
+ }
+
+ if (copy_mask)
+ vbld.MOV(writemask(dst, copy_mask),
+ swizzle(src_vector_n(orig_val, 4),
+ from_glsl_swizzle(0xf, swz)));
+
+ if (zero_mask)
+ vbld.MOV(writemask(dst, zero_mask), src_reg(0.0f));
+
+ if (one_mask)
+ vbld.MOV(writemask(dst, one_mask), src_reg(1.0f));
+
+ this->result = src_reg(storage(dst));
+ }
+ }
+
+ /**
+ * Set up the gather channel based on the swizzle, for gather4.
+ */
+ uint32_t
+ gather_channel(ir_texture *ir, uint32_t sampler)
+ {
+ const struct brw_sampler_prog_key_data *tex = self().sampler_prog_key();
+ ir_constant *chan = ir->lod_info.component->as_constant();
+ const unsigned swiz = GET_SWZ(tex->swizzles[sampler], chan->value.i[0]);
+
+ switch (swiz) {
+ case SWIZZLE_X: return 0;
+ case SWIZZLE_Y:
+ /* gather4 sampler is broken for green channel on RG32F --
+ * we must ask for blue instead.
+ */
+ return (tex->gather_channel_quirk_mask & (1 << sampler) ? 2 : 1);
+ case SWIZZLE_Z: return 2;
+ case SWIZZLE_W: return 3;
+ default:
+ unreachable("Not reached"); /* zero, one swizzles handled already */
+ }
+ }
+
+ src_reg
+ rescale_texcoord(ir_texture *ir, src_reg coordinate,
+ bool is_rect, uint32_t sampler, int texunit)
+ {
+ typename B::vector_builder vbld = bld.vector();
+ const struct brw_sampler_prog_key_data *tex = self().sampler_prog_key();
+ const unsigned clamp_mask =
+ ((tex->gl_clamp_mask[0] & (1 << sampler) ? 1 : 0) << 0) |
+ ((tex->gl_clamp_mask[1] & (1 << sampler) ? 1 : 0) << 1) |
+ ((tex->gl_clamp_mask[2] & (1 << sampler) ? 1 : 0) << 2);
+ src_vector scale;
+
+ /* The 965 requires the EU to do the normalization of GL rectangle
+ * texture coordinates. We use the program parameter state
+ * tracking to get the scaling factor.
+ */
+ if (is_rect && (brw->gen < 6 || (brw->gen >= 6 && clamp_mask))) {
+ struct gl_program_parameter_list *params = prog->Parameters;
+ int tokens[STATE_LENGTH] = {
+ STATE_INTERNAL,
+ STATE_TEXRECT_SCALE,
+ texunit,
+ 0,
+ 0
+ };
+
+ self().no16("rectangle scale uniform setup not supported on SIMD16\n");
+ if (bld.dispatch_width() == 16) {
+ return coordinate;
+ }
+
+ GLuint index = _mesa_add_state_reference(params,
+ (gl_state_index *)tokens);
+ /* Try to find existing copies of the texrect scale uniforms. */
+ for (unsigned i = 0; i < uniforms; i++) {
+ if (stage_prog_data->param[alloc_size * i] ==
+ &prog->Parameters->ParameterValues[index][0]) {
+ scale = src_vector_n(src_reg(UNIFORM, i), 2);
+ break;
+ }
+ }
+
+ /* If we didn't already set them up, do so now. */
+ if (storage(scale).file == BAD_FILE) {
+ scale = src_vector_n(src_reg(UNIFORM, uniforms), 2);
+ stage_prog_data->param[alloc_size * uniforms] =
+ &prog->Parameters->ParameterValues[index][0];
+ stage_prog_data->param[alloc_size * uniforms + 1] =
+ &prog->Parameters->ParameterValues[index][1];
+ uniform_vector_size[uniforms] = 2;
+ uniforms += CEILING(2, alloc_size);
+ }
+ }
+
+ /* The 965 requires the EU to do the normalization of GL rectangle
+ * texture coordinates. We use the program parameter state
+ * tracking to get the scaling factor.
+ */
+ if (brw->gen >= 6 && is_rect) {
+ /* On gen6+, the sampler handles the rectangle coordinates
+ * natively, without needing rescaling. But that means we have
+ * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
+ * not [0, 1] like the default case below.
+ */
+ dst_vector dst = dst_vector_n(coordinate, 4);
+ exec_condmod(BRW_CONDITIONAL_G,
+ vbld.emit(BRW_OPCODE_SEL, writemask(dst, clamp_mask),
+ src_vector(dst), src_reg(0.0f)));
+
+ /* Our parameter comes in as 1.0/width or 1.0/height,
+ * because that's what people normally want for doing
+ * texture rectangle handling. We need width or height
+ * for clamping, but we don't care enough to make a new
+ * parameter type, so just invert back.
+ */
+ dst_vector limit = vbld.natural_reg(BRW_REGISTER_TYPE_F);
+ vbld.MOV(limit, scale);
+ vbld.emit(SHADER_OPCODE_RCP, writemask(limit, clamp_mask),
+ src_vector(limit));
+
+ exec_condmod(BRW_CONDITIONAL_L,
+ vbld.emit(BRW_OPCODE_SEL, writemask(dst, clamp_mask),
+ src_vector(dst), src_vector(limit)));
+ } else {
+ if (is_rect) {
+ dst_vector dst = vbld.natural_reg(brw_type_for_base_type(ir->type));
+ src_vector src = src_vector_n(coordinate, 4);
+ coordinate = src_reg(storage(dst));
+ vbld.MUL(writemask(dst, WRITEMASK_XY), src, scale);
+ }
+
+ if (ir->coordinate) {
+ dst_vector dst = dst_vector_n(coordinate, 4);
+ exec_saturate(true,
+ vbld.MOV(writemask(dst, clamp_mask), src_vector(dst)));
+ }
+ }
+
+ return coordinate;
+ }
+
+ void
+ visit(ir_texture *ir)
+ {
+ typename B::vector_builder vbld = bld.vector();
+ const struct brw_sampler_prog_key_data *tex = self().sampler_prog_key();
+ uint32_t sampler =
+ _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
+ ir_rvalue *nonconst_sampler_index =
+ _mesa_get_sampler_array_nonconst_index(ir->sampler);
+
+ /* Handle non-constant sampler array indexing */
+ src_reg sampler_reg;
+ if (nonconst_sampler_index) {
+ /* The highest sampler which may be used by this operation is
+ * the last element of the array. Mark it here, because the generator
+ * doesn't have enough information to determine the bound.
+ */
+ uint32_t array_size = ir->sampler->as_dereference_array()
+ ->array->type->array_size();
+
+ uint32_t max_used = sampler + array_size - 1;
+ if (ir->op == ir_tg4 && brw->gen < 8) {
+ max_used += stage_prog_data->binding_table.gather_texture_start;
+ } else {
+ max_used += stage_prog_data->binding_table.texture_start;
+ }
+
+ brw_mark_surface_used(stage_prog_data, max_used);
+
+ /* Emit code to evaluate the actual indexing expression */
+ dst_reg tmp = bld.scalar_reg(BRW_REGISTER_TYPE_UD);
+ exec_all(bld.ADD(tmp, visit_result(nonconst_sampler_index),
+ src_reg(sampler)));
+ sampler_reg = src_reg(tmp);
+ } else {
+ /* Single sampler, or constant array index; the indexing expression
+ * is just an immediate.
+ */
+ sampler_reg = src_reg(sampler);
+ }
+
+ /* FINISHME: We're failing to recompile our programs when the sampler is
+ * updated. This only matters for the texture rectangle scale parameters
+ * (pre-gen6, or gen6+ with GL_CLAMP).
+ */
+ int texunit = prog->SamplerUnits[sampler];
+
+ if (ir->op == ir_tg4) {
+ /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
+ * emitting anything other than setting up the constant result.
+ */
+ ir_constant *chan = ir->lod_info.component->as_constant();
+ int swiz = GET_SWZ(tex->swizzles[sampler], chan->value.i[0]);
+ if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
+ dst_vector res = vbld.natural_reg(BRW_REGISTER_TYPE_F);
+ vbld.MOV(res, src_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
+ this->result = src_reg(storage(res));
+ return;
+ }
+ }
+
+ /* Should be lowered by do_lower_texture_projection */
+ assert(!ir->projector);
+
+ /* Should be lowered */
+ assert(!ir->offset || !ir->offset->type->is_array());
+
+ /* Generate code to compute all the subexpression trees. This has to be
+ * done before loading any values into MRFs for the sampler message since
+ * generating these values may involve SEND messages that need the MRFs.
+ */
+ src_reg coordinate;
+ if (ir->coordinate)
+ coordinate = rescale_texcoord(ir, visit_result(ir->coordinate),
+ ir->sampler->type->sampler_dimensionality ==
+ GLSL_SAMPLER_DIM_RECT,
+ sampler, texunit);
+
+ src_reg shadow_comparitor;
+ if (ir->shadow_comparitor)
+ shadow_comparitor = visit_result(ir->shadow_comparitor);
+
+ src_reg offset_val;
+ if (ir->offset && !ir->offset->as_constant())
+ offset_val = visit_result(ir->offset);
+
+ src_reg lod(0.0f), lod2, sample_index, mcs;
+ switch (ir->op) {
+ case ir_tex:
+ case ir_lod:
+ case ir_tg4:
+ case ir_query_levels:
+ break;
+ case ir_txb:
+ lod = visit_result(ir->lod_info.bias);
+ break;
+ case ir_txd:
+ lod = visit_result(ir->lod_info.grad.dPdx);
+ lod2 = visit_result(ir->lod_info.grad.dPdy);
+ break;
+ case ir_txf:
+ case ir_txl:
+ case ir_txs:
+ lod = visit_result(ir->lod_info.lod);
+ break;
+ case ir_txf_ms:
+ sample_index = visit_result(ir->lod_info.sample_index);
+
+ if (brw->gen >= 7 && tex->compressed_multisample_layout_mask & (1<<sampler))
+ mcs = emit_mcs_fetch(ir, coordinate, sampler_reg);
+ else
+ mcs = src_reg(0u);
+ break;
+ default:
+ unreachable("Unrecognized texture opcode");
+ };
+
+ /* Writemasking doesn't eliminate channels on SIMD8 texture
+ * samples, so don't worry about them.
+ */
+ dst_reg dst = storage(vbld.natural_reg(brw_type_for_base_type(ir->type)));
+ instruction *inst = self().emit_texture(
+ ir, dst, coordinate, shadow_comparitor,
+ lod, lod2, offset_val, sample_index, mcs, sampler_reg);
+
+ if (ir->offset != NULL && ir->op != ir_txf)
+ inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant());
+
+ if (ir->op == ir_tg4)
+ inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17
+
+ if (ir->shadow_comparitor)
+ inst->shadow_compare = true;
+
+ /* fixup #layers for cube map arrays */
+ if (ir->op == ir_txs) {
+ glsl_type const *type = ir->sampler->type;
+ if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
+ type->sampler_array) {
+ const unsigned components = ir->type->vector_elements;
+ dst_vector vdst = dst_vector_n(dst, components);
+ dst_reg depth = bld.scalar_reg(BRW_REGISTER_TYPE_D);
+ src_reg payload[4];
+
+ bld.emit_math(SHADER_OPCODE_INT_QUOTIENT, depth,
+ component(vdst, 2), src_reg(6));
+
+ for (unsigned i = 0; i < components; ++i)
+ payload[i] = (i == 2 ? src_reg(depth) : component(vdst, i));
+
+ vbld.LOAD_VECTOR(vdst, payload);
+ }
+ }
+
+ if (brw->gen == 6 && ir->op == ir_tg4)
+ emit_gen6_gather_wa(tex->gen6_gather_wa[sampler], dst);
+
+ swizzle_result(ir, src_reg(dst), sampler);
+ }
+
+ struct hash_table *variable_ht;
+
+ typename B::src_reg shader_start_time;
+ B bld;
+
+ const bool uses_kill;
+
+public:
+ int
+ type_size(const struct glsl_type *type)
+ {
+ unsigned int size, i;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ if (type->is_matrix()) {
+ return type->matrix_columns * type_size(type->column_type());
+ } else {
+ return CEILING(type->components(), alloc_size);
+ }
+ case GLSL_TYPE_ARRAY:
+ return type_size(type->fields.array) * type->length;
+ case GLSL_TYPE_STRUCT:
+ size = 0;
+ for (i = 0; i < type->length; i++) {
+ size += type_size(type->fields.structure[i].type);
+ }
+ return size;
+ case GLSL_TYPE_SAMPLER:
+ /* Samplers take up no register space, since they're baked in at
+ * link time.
+ */
+ return 0;
+ case GLSL_TYPE_ATOMIC_UINT:
+ return 0;
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_INTERFACE:
+ unreachable("not reached");
+ }
+
+ return 0;
+ }
+
+ /**
+ * Returns how many MRFs an opcode will write over.
+ *
+ * Note that this is not the 0 or 1 implied writes in an actual gen
+ * instruction -- the generate_* functions generate additional MOVs
+ * for setup.
+ */
+ int
+ implied_mrf_writes(instruction *inst)
+ {
+ if (inst->mlen == 0 || inst->base_mrf == -1)
+ return 0;
+
+ switch (inst->opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ return 1 * bld.dispatch_width() / 8;
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
+ case SHADER_OPCODE_POW:
+ return 2 * bld.dispatch_width() / 8;
+ case VS_OPCODE_URB_WRITE:
+ return 1;
+ case VS_OPCODE_PULL_CONSTANT_LOAD:
+ return 2;
+ case SHADER_OPCODE_GEN4_SCRATCH_READ:
+ return inst->mlen;
+ case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
+ return inst->mlen;
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+ return 1;
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
+ return inst->mlen;
+ case GS_OPCODE_URB_WRITE:
+ case GS_OPCODE_URB_WRITE_ALLOCATE:
+ case GS_OPCODE_THREAD_END:
+ return 0;
+ case GS_OPCODE_FF_SYNC:
+ return 1;
+ case SHADER_OPCODE_SHADER_TIME_ADD:
+ return 0;
+ case SHADER_OPCODE_TEX:
+ case SHADER_OPCODE_TXL:
+ case SHADER_OPCODE_TXD:
+ case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_MCS:
+ case SHADER_OPCODE_TXS:
+ case SHADER_OPCODE_TG4:
+ case SHADER_OPCODE_TG4_OFFSET:
+ case SHADER_OPCODE_LOD:
+ case FS_OPCODE_TXB:
+ return inst->header_present ? 1 : 0;
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case FS_OPCODE_INTERPOLATE_AT_CENTROID:
+ case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
+ case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
+ case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+ return 0;
+ case FS_OPCODE_FB_WRITE:
+ return 2;
+ default:
+ unreachable("not reached");
+ }
+ }
+
+ static const unsigned alloc_size = reg_traits<src_reg>::alloc_size;
+
+ brw::simple_allocator alloc;
+
+ /* Result of last visit() method. */
+ typename B::src_reg result;
+
+ const shader_time_shader_type st_type;
+};
+
+} /* namespace brw */
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index b37da4ead62..1eaef45bf68 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -284,10 +284,10 @@ get_written_and_reset(struct brw_context *brw, int i,
/* Find where we recorded written and reset. */
int wi, ri;
- for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
+ for (wi = i; brw->shader_time.types[wi] != type + ST_WRITTEN; wi++)
;
- for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
+ for (ri = i; brw->shader_time.types[ri] != type + ST_RESET; ri++)
;
*written = brw->shader_time.cumulative[wi];
@@ -328,27 +328,18 @@ brw_report_shader_time(struct brw_context *brw)
sorted[i] = &scaled[i];
- switch (type) {
- case ST_VS_WRITTEN:
- case ST_VS_RESET:
- case ST_GS_WRITTEN:
- case ST_GS_RESET:
- case ST_FS8_WRITTEN:
- case ST_FS8_RESET:
- case ST_FS16_WRITTEN:
- case ST_FS16_RESET:
+ switch (type % ST_NUM_ENTRIES) {
+ case ST_BASE:
+ get_written_and_reset(brw, i, &written, &reset);
+ break;
+
+ case ST_WRITTEN:
+ case ST_RESET:
/* We'll handle these when along with the time. */
scaled[i] = 0;
continue;
- case ST_VS:
- case ST_GS:
- case ST_FS8:
- case ST_FS16:
- get_written_and_reset(brw, i, &written, &reset);
- break;
-
- default:
+ case ST_SUM:
/* I sometimes want to print things that aren't the 3 shader times.
* Just print the sum in that case.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index a8650c3454b..9cd391471da 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -24,6 +24,8 @@
#ifndef BRW_PROGRAM_H
#define BRW_PROGRAM_H
+#include "main/mtypes.h"
+
enum gen6_gather_sampler_wa {
WA_SIGN = 1, /* whether we need to sign extend */
WA_8BIT = 2, /* if we have an 8bit format needing wa */
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 19af0ae09fc..a27a3bad396 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -80,6 +80,7 @@ struct brw_context;
#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
+#define BRW_SWIZZLE_XXYY BRW_SWIZZLE4(0,0,1,1)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
#define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
#define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index db94e527ca7..4017b14ddc0 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -395,7 +395,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
class instruction_scheduler {
public:
- instruction_scheduler(backend_visitor *v, int grf_count,
+ instruction_scheduler(base_visitor *v, int grf_count,
instruction_scheduler_mode mode)
{
this->bv = v;
@@ -451,7 +451,7 @@ public:
int grf_count;
int time;
exec_list instructions;
- backend_visitor *bv;
+ base_visitor *bv;
instruction_scheduler_mode mode;
@@ -1081,12 +1081,14 @@ vec4_instruction_scheduler::calculate_deps()
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(last_mrf_write[inst->base_mrf + i], n);
+ if (inst->base_mrf != -1) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
+ }
}
if (inst->reads_flag()) {
@@ -1116,7 +1118,7 @@ vec4_instruction_scheduler::calculate_deps()
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && inst->base_mrf != -1) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
add_dep(last_mrf_write[inst->base_mrf + i], n);
last_mrf_write[inst->base_mrf + i] = n;
@@ -1171,12 +1173,14 @@ vec4_instruction_scheduler::calculate_deps()
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ if (inst->base_mrf != -1) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ }
}
if (inst->reads_flag()) {
@@ -1203,7 +1207,7 @@ vec4_instruction_scheduler::calculate_deps()
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && inst->base_mrf != -1) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
last_mrf_write[inst->base_mrf + i] = n;
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 05f6fe78523..18cee8722af 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -544,23 +544,6 @@ brw_instruction_name(enum opcode op)
}
}
-backend_visitor::backend_visitor(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- gl_shader_stage stage)
- : brw(brw),
- ctx(&brw->ctx),
- shader(shader_prog ?
- (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
- shader_prog(shader_prog),
- prog(prog),
- stage_prog_data(stage_prog_data),
- cfg(NULL),
- stage(stage)
-{
-}
-
bool
backend_reg::is_zero() const
{
@@ -829,104 +812,3 @@ backend_instruction::remove(bblock_t *block)
exec_node::remove();
}
-
-void
-backend_visitor::dump_instructions()
-{
- dump_instructions(NULL);
-}
-
-void
-backend_visitor::dump_instructions(const char *name)
-{
- FILE *file = stderr;
- if (name && geteuid() != 0) {
- file = fopen(name, "w");
- if (!file)
- file = stderr;
- }
-
- int ip = 0;
- foreach_block_and_inst(block, backend_instruction, inst, cfg) {
- if (!name)
- fprintf(stderr, "%d: ", ip++);
- dump_instruction(inst, file);
- }
-
- if (file != stderr) {
- fclose(file);
- }
-}
-
-void
-backend_visitor::calculate_cfg()
-{
- if (this->cfg)
- return;
- cfg = new(mem_ctx) cfg_t(&this->instructions);
-}
-
-void
-backend_visitor::invalidate_cfg()
-{
- ralloc_free(this->cfg);
- this->cfg = NULL;
-}
-
-/**
- * Sets up the starting offsets for the groups of binding table entries
- * commong to all pipeline stages.
- *
- * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
- * unused but also make sure that addition of small offsets to them will
- * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
- */
-void
-backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
-{
- int num_textures = _mesa_fls(prog->SamplersUsed);
-
- stage_prog_data->binding_table.texture_start = next_binding_table_offset;
- next_binding_table_offset += num_textures;
-
- if (shader) {
- stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
- next_binding_table_offset += shader->base.NumUniformBlocks;
- } else {
- stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
- }
-
- if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
- stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
- next_binding_table_offset++;
- } else {
- stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
- }
-
- if (prog->UsesGather) {
- if (brw->gen >= 8) {
- stage_prog_data->binding_table.gather_texture_start =
- stage_prog_data->binding_table.texture_start;
- } else {
- stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
- next_binding_table_offset += num_textures;
- }
- } else {
- stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
- }
-
- if (shader_prog && shader_prog->NumAtomicBuffers) {
- stage_prog_data->binding_table.abo_start = next_binding_table_offset;
- next_binding_table_offset += shader_prog->NumAtomicBuffers;
- } else {
- stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
- }
-
- /* This may or may not be used depending on how the compile goes. */
- stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
- next_binding_table_offset++;
-
- assert(next_binding_table_offset <= BRW_MAX_SURFACES);
-
- /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
-}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 0f927acfc4a..0c75b4c3ee2 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -21,18 +21,14 @@
* IN THE SOFTWARE.
*/
+#pragma once
+
#include <stdint.h>
#include "brw_reg.h"
#include "brw_defines.h"
#include "main/compiler.h"
#include "glsl/ir.h"
-#ifdef __cplusplus
-#include "brw_ir_allocator.h"
-#endif
-
-#pragma once
-
enum PACKED register_file {
BAD_FILE,
GRF,
@@ -149,52 +145,6 @@ enum instruction_scheduler_mode {
SCHEDULE_POST,
};
-class backend_visitor : public ir_visitor {
-protected:
-
- backend_visitor(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- gl_shader_stage stage);
-
-public:
-
- struct brw_context * const brw;
- struct gl_context * const ctx;
- struct brw_shader * const shader;
- struct gl_shader_program * const shader_prog;
- struct gl_program * const prog;
- struct brw_stage_prog_data * const stage_prog_data;
-
- /** ralloc context for temporary data used during compile */
- void *mem_ctx;
-
- /**
- * List of either fs_inst or vec4_instruction (inheriting from
- * backend_instruction)
- */
- exec_list instructions;
-
- cfg_t *cfg;
-
- gl_shader_stage stage;
-
- brw::simple_allocator alloc;
-
- virtual void dump_instruction(backend_instruction *inst) = 0;
- virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0;
- virtual void dump_instructions();
- virtual void dump_instructions(const char *name);
-
- void calculate_cfg();
- void invalidate_cfg();
-
- void assign_common_binding_table_offsets(uint32_t next_binding_table_offset);
-
- virtual void invalidate_live_intervals() = 0;
-};
-
uint32_t brw_texture_offset(struct gl_context *ctx, ir_constant *offset);
#endif /* __cplusplus */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 51a2390e764..49c762aec45 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -169,6 +169,21 @@ dst_reg::dst_reg(const src_reg &reg)
}
bool
+dst_reg::equals(const dst_reg &r) const
+{
+ return (file == r.file &&
+ reg == r.reg &&
+ reg_offset == r.reg_offset &&
+ type == r.type &&
+ negate == r.negate &&
+ abs == r.abs &&
+ writemask == r.writemask &&
+ !reladdr && !r.reladdr &&
+ memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
+ sizeof(fixed_hw_reg)) == 0);
+}
+
+bool
vec4_instruction::is_send_from_grf()
{
switch (opcode) {
@@ -195,66 +210,6 @@ vec4_instruction::can_do_source_mods(struct brw_context *brw)
return true;
}
-/**
- * Returns how many MRFs an opcode will write over.
- *
- * Note that this is not the 0 or 1 implied writes in an actual gen
- * instruction -- the generate_* functions generate additional MOVs
- * for setup.
- */
-int
-vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
-{
- if (inst->mlen == 0)
- return 0;
-
- switch (inst->opcode) {
- case SHADER_OPCODE_RCP:
- case SHADER_OPCODE_RSQ:
- case SHADER_OPCODE_SQRT:
- case SHADER_OPCODE_EXP2:
- case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_SIN:
- case SHADER_OPCODE_COS:
- return 1;
- case SHADER_OPCODE_INT_QUOTIENT:
- case SHADER_OPCODE_INT_REMAINDER:
- case SHADER_OPCODE_POW:
- return 2;
- case VS_OPCODE_URB_WRITE:
- return 1;
- case VS_OPCODE_PULL_CONSTANT_LOAD:
- return 2;
- case SHADER_OPCODE_GEN4_SCRATCH_READ:
- return 2;
- case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- return 3;
- case GS_OPCODE_URB_WRITE:
- case GS_OPCODE_URB_WRITE_ALLOCATE:
- case GS_OPCODE_THREAD_END:
- return 0;
- case GS_OPCODE_FF_SYNC:
- return 1;
- case SHADER_OPCODE_SHADER_TIME_ADD:
- return 0;
- case SHADER_OPCODE_TEX:
- case SHADER_OPCODE_TXL:
- case SHADER_OPCODE_TXD:
- case SHADER_OPCODE_TXF:
- case SHADER_OPCODE_TXF_CMS:
- case SHADER_OPCODE_TXF_MCS:
- case SHADER_OPCODE_TXS:
- case SHADER_OPCODE_TG4:
- case SHADER_OPCODE_TG4_OFFSET:
- return inst->header_present ? 1 : 0;
- case SHADER_OPCODE_UNTYPED_ATOMIC:
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- return 0;
- default:
- unreachable("not reached");
- }
-}
-
bool
src_reg::equals(const src_reg &r) const
{
@@ -545,7 +500,7 @@ vec4_visitor::split_uniform_registers()
}
/* Update that everything is now vector-sized. */
- for (int i = 0; i < this->uniforms; i++) {
+ for (unsigned i = 0; i < this->uniforms; i++) {
this->uniform_size[i] = 1;
}
}
@@ -574,12 +529,12 @@ vec4_visitor::pack_uniform_registers()
}
}
- int new_uniform_count = 0;
+ unsigned new_uniform_count = 0;
/* Now, figure out a packing of the live uniform vectors into our
* push constants.
*/
- for (int src = 0; src < uniforms; src++) {
+ for (unsigned src = 0; src < uniforms; src++) {
assert(src < uniform_array_size);
int size = this->uniform_vector_size[src];
@@ -588,7 +543,7 @@ vec4_visitor::pack_uniform_registers()
continue;
}
- int dst;
+ unsigned dst;
/* Find the lowest place we can slot this uniform in. */
for (dst = 0; dst < src; dst++) {
if (this->uniform_vector_size[dst] + size <= 4)
@@ -725,7 +680,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
* If changing this value, note the limitation about total_regs in
* brw_curbe.c.
*/
- int max_uniform_components = 32 * 8;
+ unsigned max_uniform_components = 32 * 8;
if (this->uniforms * 4 <= max_uniform_components)
return;
@@ -734,7 +689,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
* look for the most infrequently used uniform vec4s, but leave
* that for later.
*/
- for (int i = 0; i < this->uniforms * 4; i += 4) {
+ for (unsigned i = 0; i < this->uniforms * 4; i += 4) {
pull_constant_loc[i / 4] = -1;
if (i >= max_uniform_components) {
@@ -778,12 +733,13 @@ vec4_visitor::move_push_constants_to_pull_constants()
pull_constant_loc[inst->src[i].reg] == -1)
continue;
- int uniform = inst->src[i].reg;
-
- dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+ vec4_builder ibld = bld.at(block, inst);
+ int loc = pull_constant_loc[inst->src[i].reg] + inst->src[i].reg_offset;
+ src_reg surf_index(prog_data->base.binding_table.pull_constants_start);
+ dst_reg temp = ibld.vector().natural_reg(BRW_REGISTER_TYPE_F);
- emit_pull_constant_load(block, inst, temp, inst->src[i],
- pull_constant_loc[uniform]);
+ emit_pull_constant_load(ibld, temp, surf_index, 16 * loc,
+ inst->src[i].reladdr, 4);
inst->src[i].file = temp.file;
inst->src[i].reg = temp.reg;
@@ -1578,97 +1534,6 @@ vec4_visitor::assign_binding_table_offsets()
assign_common_binding_table_offsets(0);
}
-src_reg
-vec4_visitor::get_timestamp()
-{
- assert(brw->gen >= 7);
-
- src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_TIMESTAMP,
- 0,
- BRW_REGISTER_TYPE_UD,
- BRW_VERTICAL_STRIDE_0,
- BRW_WIDTH_4,
- BRW_HORIZONTAL_STRIDE_4,
- BRW_SWIZZLE_XYZW,
- WRITEMASK_XYZW));
-
- dst_reg dst = dst_reg(this, glsl_type::uvec4_type);
-
- vec4_instruction *mov = emit(MOV(dst, ts));
- /* We want to read the 3 fields we care about (mostly field 0, but also 2)
- * even if it's not enabled in the dispatch.
- */
- mov->force_writemask_all = true;
-
- return src_reg(dst);
-}
-
-void
-vec4_visitor::emit_shader_time_begin()
-{
- current_annotation = "shader time start";
- shader_start_time = get_timestamp();
-}
-
-void
-vec4_visitor::emit_shader_time_end()
-{
- current_annotation = "shader time end";
- src_reg shader_end_time = get_timestamp();
-
-
- /* Check that there weren't any timestamp reset events (assuming these
- * were the only two timestamp reads that happened).
- */
- src_reg reset_end = shader_end_time;
- reset_end.swizzle = BRW_SWIZZLE_ZZZZ;
- vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u)));
- test->conditional_mod = BRW_CONDITIONAL_Z;
-
- emit(IF(BRW_PREDICATE_NORMAL));
-
- /* Take the current timestamp and get the delta. */
- shader_start_time.negate = true;
- dst_reg diff = dst_reg(this, glsl_type::uint_type);
- emit(ADD(diff, shader_start_time, shader_end_time));
-
- /* If there were no instructions between the two timestamp gets, the diff
- * is 2 cycles. Remove that overhead, so I can forget about that when
- * trying to determine the time taken for single instructions.
- */
- emit(ADD(diff, src_reg(diff), src_reg(-2u)));
-
- emit_shader_time_write(st_base, src_reg(diff));
- emit_shader_time_write(st_written, src_reg(1u));
- emit(BRW_OPCODE_ELSE);
- emit_shader_time_write(st_reset, src_reg(1u));
- emit(BRW_OPCODE_ENDIF);
-}
-
-void
-vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
- src_reg value)
-{
- int shader_time_index =
- brw_get_shader_time_index(brw, shader_prog, prog, type);
-
- dst_reg dst =
- dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2));
-
- dst_reg offset = dst;
- dst_reg time = dst;
- time.reg_offset++;
-
- offset.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
-
- time.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(time, src_reg(value)));
-
- emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst));
-}
-
bool
vec4_visitor::run()
{
@@ -1689,13 +1554,13 @@ vec4_visitor::run()
} else {
emit_program_code();
}
- base_ir = NULL;
if (key->userclip_active && !prog->UsesClipDistanceOut)
setup_uniform_clipplane_values();
emit_thread_end();
+ bld = bld.at(NULL, NULL);
calculate_cfg();
/* Before any optimization, push array accesses out to scratch
@@ -1731,7 +1596,7 @@ vec4_visitor::run()
snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \
stage_name, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
\
- backend_visitor::dump_instructions(filename); \
+ base_visitor::dump_instructions(filename); \
} \
\
progress = progress || this_progress; \
@@ -1743,7 +1608,7 @@ vec4_visitor::run()
snprintf(filename, 64, "%s-%04d-00-start",
stage_name, shader_prog ? shader_prog->Name : 0);
- backend_visitor::dump_instructions(filename);
+ base_visitor::dump_instructions(filename);
}
bool progress;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 39c65b7b8ed..ab71a5a13ad 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -25,13 +25,10 @@
#define BRW_VEC4_H
#include <stdint.h>
-#include "brw_shader.h"
#include "main/compiler.h"
-#include "program/hash_table.h"
#include "brw_program.h"
-
#ifdef __cplusplus
-#include "brw_ir_vec4.h"
+#include "brw_ir_visitor.h"
extern "C" {
#endif
@@ -93,7 +90,7 @@ namespace brw {
* Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
* fixed-function) into VS IR.
*/
-class vec4_visitor : public backend_visitor
+class vec4_visitor : public backend_visitor<vec4_visitor, vec4_builder>
{
public:
vec4_visitor(struct brw_context *brw,
@@ -106,53 +103,17 @@ public:
void *mem_ctx,
bool debug_flag,
bool no_spills,
- shader_time_shader_type st_base,
- shader_time_shader_type st_written,
- shader_time_shader_type st_reset);
- ~vec4_visitor();
-
- dst_reg dst_null_f()
- {
- return dst_reg(brw_null_reg());
- }
-
- dst_reg dst_null_d()
- {
- return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- }
-
- dst_reg dst_null_ud()
- {
- return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
- }
+ shader_time_shader_type st_type);
struct brw_vec4_compile * const c;
const struct brw_vec4_prog_key * const key;
struct brw_vec4_prog_data * const prog_data;
unsigned int sanity_param_count;
- char *fail_msg;
- bool failed;
-
- /**
- * GLSL IR currently being processed, which is associated with our
- * driver IR instructions for debugging purposes.
- */
- const void *base_ir;
- const char *current_annotation;
-
- int first_non_payload_grf;
- unsigned int max_grf;
- int *virtual_grf_start;
- int *virtual_grf_end;
dst_reg userplane[MAX_CLIP_PLANES];
bool live_intervals_valid;
- dst_reg *variable_storage(ir_variable *var);
-
- void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
-
bool need_all_constants_in_pull_buffer;
/**
@@ -164,48 +125,29 @@ public:
*/
/*@{*/
virtual void visit(ir_variable *);
- virtual void visit(ir_loop *);
- virtual void visit(ir_loop_jump *);
- virtual void visit(ir_function_signature *);
- virtual void visit(ir_function *);
- virtual void visit(ir_expression *);
- virtual void visit(ir_swizzle *);
- virtual void visit(ir_dereference_variable *);
- virtual void visit(ir_dereference_array *);
- virtual void visit(ir_dereference_record *);
- virtual void visit(ir_assignment *);
- virtual void visit(ir_constant *);
- virtual void visit(ir_call *);
- virtual void visit(ir_return *);
virtual void visit(ir_discard *);
- virtual void visit(ir_texture *);
- virtual void visit(ir_if *);
virtual void visit(ir_emit_vertex *);
virtual void visit(ir_end_primitive *);
/*@}*/
- src_reg result;
+ dst_reg
+ temporary_reg(const glsl_type *type)
+ {
+ const unsigned n = (type->is_array() || type->is_record() ?
+ 4 : type->vector_elements);
+ return resize(bld.natural_reg(brw_type_for_base_type(type),
+ type_size(type)), n);
+ }
/* Regs for vertex results. Generated at ir_variable visiting time
* for the ir->location's used.
*/
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
- int *uniform_size;
- int *uniform_vector_size;
- int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
- int uniforms;
-
- src_reg shader_start_time;
-
- struct hash_table *variable_ht;
bool run(void);
- void fail(const char *msg, ...);
void setup_uniform_clipplane_values();
- void setup_uniform_values(ir_variable *ir);
- void setup_builtin_uniform_values(ir_variable *ir);
int setup_uniforms(int payload_reg);
bool reg_allocate_trivial();
bool reg_allocate();
@@ -231,120 +173,21 @@ public:
void opt_set_dependency_control();
void opt_schedule_instructions();
- vec4_instruction *emit(vec4_instruction *inst);
-
- vec4_instruction *emit(enum opcode opcode);
-
- vec4_instruction *emit(enum opcode opcode, dst_reg dst);
-
- vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
-
- vec4_instruction *emit(enum opcode opcode, dst_reg dst,
- src_reg src0, src_reg src1);
-
- vec4_instruction *emit(enum opcode opcode, dst_reg dst,
- src_reg src0, src_reg src1, src_reg src2);
-
- vec4_instruction *emit_before(bblock_t *block,
- vec4_instruction *inst,
- vec4_instruction *new_inst);
-
- vec4_instruction *MOV(const dst_reg &dst, const src_reg &src0);
- vec4_instruction *NOT(const dst_reg &dst, const src_reg &src0);
- vec4_instruction *RNDD(const dst_reg &dst, const src_reg &src0);
- vec4_instruction *RNDE(const dst_reg &dst, const src_reg &src0);
- vec4_instruction *RNDZ(const dst_reg &dst, const src_reg &src0);
- vec4_instruction *FRC(const dst_reg &dst, const src_reg &src0);
- vec4_instruction *F32TO16(const dst_reg &dst, const src_reg &src0);
- vec4_instruction *F16TO32(const dst_reg &dst, const src_reg &src0);
- vec4_instruction *ADD(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *MUL(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *MACH(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *MAC(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *AND(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *OR(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *XOR(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *DP3(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *DP4(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *DPH(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *SHL(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *SHR(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *ASR(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
- enum brw_conditional_mod condition);
- vec4_instruction *IF(src_reg src0, src_reg src1,
- enum brw_conditional_mod condition);
- vec4_instruction *IF(enum brw_predicate predicate);
- vec4_instruction *PULL_CONSTANT_LOAD(const dst_reg &dst,
- const src_reg &index);
- vec4_instruction *SCRATCH_READ(const dst_reg &dst, const src_reg &index);
- vec4_instruction *SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
- const src_reg &index);
- vec4_instruction *LRP(const dst_reg &dst, const src_reg &a,
- const src_reg &y, const src_reg &x);
- vec4_instruction *BFREV(const dst_reg &dst, const src_reg &value);
- vec4_instruction *BFE(const dst_reg &dst, const src_reg &bits,
- const src_reg &offset, const src_reg &value);
- vec4_instruction *BFI1(const dst_reg &dst, const src_reg &bits,
- const src_reg &offset);
- vec4_instruction *BFI2(const dst_reg &dst, const src_reg &bfi1_dst,
- const src_reg &insert, const src_reg &base);
- vec4_instruction *FBH(const dst_reg &dst, const src_reg &value);
- vec4_instruction *FBL(const dst_reg &dst, const src_reg &value);
- vec4_instruction *CBIT(const dst_reg &dst, const src_reg &value);
- vec4_instruction *MAD(const dst_reg &dst, const src_reg &c,
- const src_reg &b, const src_reg &a);
- vec4_instruction *ADDC(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
- vec4_instruction *SUBB(const dst_reg &dst, const src_reg &src0,
- const src_reg &src1);
-
- int implied_mrf_writes(vec4_instruction *inst);
-
- bool try_rewrite_rhs_to_dst(ir_assignment *ir,
- dst_reg dst,
- src_reg src,
- vec4_instruction *pre_rhs_inst,
- vec4_instruction *last_rhs_inst);
-
- /** Walks an exec_list of ir_instruction and sends it through this visitor. */
- void visit_instructions(const exec_list *list);
+ instruction *SCRATCH_READ(vec4_builder &bld, const dst_reg &dst,
+ const src_reg &index);
+ instruction *SCRATCH_WRITE(vec4_builder &bld, const dst_reg &dst,
+ const src_reg &src, const src_reg &index);
+
+ void emit_pull_constant_load(vec4_builder &bld,
+ const dst_reg &dst,
+ const src_reg &surf_index,
+ uint32_t off,
+ const src_reg *reladdr,
+ unsigned num_components);
void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
src_reg src0, src_reg src1, src_reg one);
- void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
- void emit_if_gen6(ir_if *ir);
-
- void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
- src_reg src0, src_reg src1);
-
- void emit_lrp(const dst_reg &dst,
- const src_reg &x, const src_reg &y, const src_reg &a);
-
- void emit_block_move(dst_reg *dst, src_reg *src,
- const struct glsl_type *type, brw_predicate predicate);
-
- void emit_constant_values(dst_reg *dst, ir_constant *value);
-
- /**
- * Emit the correct dot-product instruction for the type of arguments
- */
- void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
-
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, src_reg src0);
@@ -354,69 +197,53 @@ public:
void emit_scs(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, const src_reg &src);
- src_reg fix_3src_operand(src_reg src);
-
- void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
- void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
- void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
- void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
- void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
- void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
- src_reg fix_math_operand(src_reg src);
-
void emit_pack_half_2x16(dst_reg dst, src_reg src0);
void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
- uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
- src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
- void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
- void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
-
void emit_ndc_computation();
void emit_psiz_and_flags(dst_reg reg);
void emit_clip_distances(dst_reg reg, int offset);
void emit_generic_urb_slot(dst_reg reg, int varying);
void emit_urb_slot(dst_reg reg, int varying);
- void emit_shader_time_begin();
- void emit_shader_time_end();
- void emit_shader_time_write(enum shader_time_shader_type type,
- src_reg value);
-
- void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- dst_reg dst, src_reg offset, src_reg src0,
- src_reg src1);
-
- void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
- src_reg offset);
-
src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
src_reg *reladdr, int reg_offset);
- src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
- src_reg *reladdr, int reg_offset);
void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
dst_reg dst,
src_reg orig_src,
int base_offset);
void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
int base_offset);
- void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
- dst_reg dst,
- src_reg orig_src,
- int base_offset);
- bool try_emit_mad(ir_expression *ir);
- bool try_emit_b2f_of_compare(ir_expression *ir);
- void resolve_ud_negate(src_reg *reg);
+ vec4_instruction *emit_texture(ir_texture *ir, const dst_reg &dst,
+ const src_reg &coordinate,
+ const src_reg &shadow_c,
+ const src_reg &lod, const src_reg &lod2,
+ const src_reg &offset_val,
+ const src_reg &sample_index,
+ const src_reg &mcs, const src_reg &sampler);
- src_reg get_timestamp();
+ src_reg emit_untyped_surface_header();
bool process_move_condition(ir_rvalue *ir);
void dump_instruction(backend_instruction *inst);
void dump_instruction(backend_instruction *inst, FILE *file);
- void visit_atomic_counter_intrinsic(ir_call *ir);
+ void try_replace_with_sel() {}
+
+ bool
+ emit_interpolate_expression(ir_expression *ir)
+ {
+ unreachable("not reached");
+ }
+
+ const struct brw_sampler_prog_key_data *
+ sampler_prog_key() const {
+ return &key->tex;
+ }
+
+ void no16(const char *msg, ...) {}
protected:
void emit_vertex();
@@ -432,7 +259,6 @@ protected:
virtual void emit_thread_end() = 0;
virtual void emit_urb_write_header(int mrf) = 0;
virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
- virtual int compute_array_stride(ir_dereference_array *ir);
const bool debug_flag;
@@ -441,10 +267,6 @@ private:
* If true, then register allocation should fail instead of spilling.
*/
const bool no_spills;
-
- const shader_time_shader_type st_base;
- const shader_time_shader_type st_written;
- const shader_time_shader_type st_reset;
};
@@ -537,11 +359,13 @@ private:
void generate_untyped_atomic(vec4_instruction *inst,
struct brw_reg dst,
+ struct brw_reg payload,
struct brw_reg atomic_op,
struct brw_reg surf_index);
void generate_untyped_surface_read(vec4_instruction *inst,
struct brw_reg dst,
+ struct brw_reg payload,
struct brw_reg surf_index);
struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index b0a5c0a65e9..3ccac54e436 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -163,18 +163,17 @@ vec4_visitor::opt_cse_local(bblock_t *block)
entry->tmp = src_reg(this, glsl_type::float_type);
entry->tmp.type = inst->dst.type;
entry->tmp.swizzle = BRW_SWIZZLE_XYZW;
-
- vec4_instruction *copy = MOV(entry->generator->dst, entry->tmp);
- entry->generator->insert_after(block, copy);
+ bld.at(block, (vec4_instruction *)entry->generator->next)
+ .MOV(entry->generator->dst, entry->tmp);
entry->generator->dst = dst_reg(entry->tmp);
}
/* dest <- temp */
if (!inst->dst.is_null()) {
assert(inst->dst.type == entry->tmp.type);
- vec4_instruction *copy = MOV(inst->dst, entry->tmp);
+ vec4_instruction *copy =
+ bld.at(block, inst).MOV(inst->dst, entry->tmp);
copy->force_writemask_all = inst->force_writemask_all;
- inst->insert_before(block, copy);
}
/* Set our iterator so that next time through the loop inst->next
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index e5225673812..308a2114212 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1098,6 +1098,7 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
void
vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
struct brw_reg dst,
+ struct brw_reg payload,
struct brw_reg atomic_op,
struct brw_reg surf_index)
{
@@ -1106,8 +1107,7 @@ vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
surf_index.file == BRW_IMMEDIATE_VALUE &&
surf_index.type == BRW_REGISTER_TYPE_UD);
- brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf),
- atomic_op.dw1.ud, surf_index.dw1.ud,
+ brw_untyped_atomic(p, dst, payload, atomic_op.dw1.ud, surf_index.dw1.ud,
inst->mlen, 1);
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
@@ -1116,13 +1116,13 @@ vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
void
vec4_generator::generate_untyped_surface_read(vec4_instruction *inst,
struct brw_reg dst,
+ struct brw_reg payload,
struct brw_reg surf_index)
{
assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
surf_index.type == BRW_REGISTER_TYPE_UD);
- brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf),
- surf_index.dw1.ud,
+ brw_untyped_surface_read(p, dst, payload, surf_index.dw1.ud,
inst->mlen, 1);
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
@@ -1479,11 +1479,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
- generate_untyped_atomic(inst, dst, src[0], src[1]);
+ generate_untyped_atomic(inst, dst, src[0], src[1], src[2]);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- generate_untyped_surface_read(inst, dst, src[0]);
+ generate_untyped_surface_read(inst, dst, src[0], src[1]);
break;
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index c569e0aa4ca..ce3ed7f65b3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -41,8 +41,7 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw,
bool no_spills)
: vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base,
&c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, no_spills,
- ST_GS, ST_GS_WRITTEN, ST_GS_RESET),
+ INTEL_DEBUG & DEBUG_GS, no_spills, ST_GS),
c(c)
{
}
@@ -55,8 +54,8 @@ vec4_gs_visitor::make_reg_for_system_value(ir_variable *ir)
switch (ir->data.location) {
case SYSTEM_VALUE_INVOCATION_ID:
- this->current_annotation = "initialize gl_InvocationID";
- emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
+ bld.set_annotation("initialize gl_InvocationID");
+ bld.emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
break;
default:
unreachable("not reached");
@@ -148,17 +147,17 @@ vec4_gs_visitor::emit_prolog()
* reads/writes to garbage memory). So just set it to zero at the top of
* the shader.
*/
- this->current_annotation = "clear r0.2";
+ bld.set_annotation("clear r0.2");
dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
- vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, 0u);
+ vec4_instruction *inst = bld.emit(GS_OPCODE_SET_DWORD_2, r0, 0u);
inst->force_writemask_all = true;
/* Create a virtual register to hold the vertex count */
this->vertex_count = src_reg(this, glsl_type::uint_type);
/* Initialize the vertex_count register to 0 */
- this->current_annotation = "initialize vertex_count";
- inst = emit(MOV(dst_reg(this->vertex_count), 0u));
+ bld.set_annotation("initialize vertex_count");
+ inst = bld.MOV(dst_reg(this->vertex_count), 0u);
inst->force_writemask_all = true;
if (c->control_data_header_size_bits > 0) {
@@ -172,8 +171,8 @@ vec4_gs_visitor::emit_prolog()
* Otherwise, we need to initialize it to 0 here.
*/
if (c->control_data_header_size_bits <= 32) {
- this->current_annotation = "initialize control data bits";
- inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+ bld.set_annotation("initialize control data bits");
+ inst = bld.MOV(dst_reg(this->control_data_bits), 0u);
inst->force_writemask_all = true;
}
}
@@ -183,7 +182,7 @@ vec4_gs_visitor::emit_prolog()
* component of VARYING_SLOT_PSIZ.
*/
if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) {
- this->current_annotation = "swizzle gl_PointSize input";
+ bld.set_annotation("swizzle gl_PointSize input");
for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) {
dst_reg dst(ATTR,
BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
@@ -191,7 +190,7 @@ vec4_gs_visitor::emit_prolog()
src_reg src(dst);
dst.writemask = WRITEMASK_X;
src.swizzle = BRW_SWIZZLE_WWWW;
- inst = emit(MOV(dst, src));
+ inst = bld.MOV(dst, src);
/* In dual instanced dispatch mode, dst has a width of 4, so we need
* to make sure the MOV happens regardless of which channels are
@@ -201,7 +200,7 @@ vec4_gs_visitor::emit_prolog()
}
}
- this->current_annotation = NULL;
+ bld.set_annotation(NULL);
}
@@ -222,7 +221,7 @@ vec4_gs_visitor::emit_thread_end()
* corresponding to the most recently output vertex still need to be
* emitted.
*/
- current_annotation = "thread end: emit control data bits";
+ bld.set_annotation("thread end: emit control data bits");
emit_control_data_bits();
}
@@ -231,15 +230,15 @@ vec4_gs_visitor::emit_thread_end()
*/
int base_mrf = 1;
- current_annotation = "thread end";
+ bld.set_annotation("thread end");
dst_reg mrf_reg(MRF, base_mrf);
src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- vec4_instruction *inst = emit(MOV(mrf_reg, r0));
+ vec4_instruction *inst = bld.MOV(mrf_reg, r0);
inst->force_writemask_all = true;
- emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
+ bld.emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_end();
- inst = emit(GS_OPCODE_THREAD_END);
+ inst = bld.emit(GS_OPCODE_THREAD_END);
inst->base_mrf = base_mrf;
inst->mlen = 1;
}
@@ -258,10 +257,10 @@ vec4_gs_visitor::emit_urb_write_header(int mrf)
*/
dst_reg mrf_reg(MRF, mrf);
src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- this->current_annotation = "URB write header";
- vec4_instruction *inst = emit(MOV(mrf_reg, r0));
+ bld.set_annotation("URB write header");
+ vec4_instruction *inst = bld.MOV(mrf_reg, r0);
inst->force_writemask_all = true;
- emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
+ bld.emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
(uint32_t) c->prog_data.output_vertex_size_hwords);
}
@@ -275,7 +274,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
*/
(void) complete;
- vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
+ vec4_instruction *inst = bld.emit(GS_OPCODE_URB_WRITE);
inst->offset = c->prog_data.control_data_header_size_hwords;
/* We need to increment Global Offset by 1 to make room for Broadwell's
@@ -288,9 +287,8 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
return inst;
}
-
-int
-vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir)
+unsigned
+vec4_gs_visitor::get_array_stride(ir_dereference_array *ir)
{
/* Geometry shader inputs are arrays, but they use an unusual array layout:
* instead of all array elements for a given geometry shader input being
@@ -303,7 +301,7 @@ vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir)
if (deref_var && deref_var->var->data.mode == ir_var_shader_in)
return BRW_VARYING_SLOT_COUNT;
else
- return vec4_visitor::compute_array_stride(ir);
+ return backend_visitor::get_array_stride(ir);
}
@@ -349,8 +347,8 @@ vec4_gs_visitor::emit_control_data_bits()
/* If vertex_count is 0, then no control data bits have been accumulated
* yet, so we should do nothing.
*/
- emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.CMP(bld.reg_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ);
+ bld.IF(BRW_PREDICATE_NORMAL);
{
/* If we are using either channel masks or a per-slot offset, then we
* need to figure out which DWORD we are trying to write to, using the
@@ -366,11 +364,11 @@ vec4_gs_visitor::emit_control_data_bits()
src_reg dword_index(this, glsl_type::uint_type);
if (urb_write_flags) {
src_reg prev_count(this, glsl_type::uint_type);
- emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+ bld.ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu);
unsigned log2_bits_per_vertex =
_mesa_fls(c->control_data_bits_per_vertex);
- emit(SHR(dst_reg(dword_index), prev_count,
- (uint32_t) (6 - log2_bits_per_vertex)));
+ bld.SHR(dst_reg(dword_index), prev_count,
+ (uint32_t) (6 - log2_bits_per_vertex));
}
/* Start building the URB write message. The first MRF gets a copy of
@@ -379,7 +377,7 @@ vec4_gs_visitor::emit_control_data_bits()
int base_mrf = 1;
dst_reg mrf_reg(MRF, base_mrf);
src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- vec4_instruction *inst = emit(MOV(mrf_reg, r0));
+ vec4_instruction *inst = bld.MOV(mrf_reg, r0);
inst->force_writemask_all = true;
if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
@@ -387,8 +385,8 @@ vec4_gs_visitor::emit_control_data_bits()
* the appropriate OWORD within the control data header.
*/
src_reg per_slot_offset(this, glsl_type::uint_type);
- emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
- emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
+ bld.SHR(dst_reg(per_slot_offset), dword_index, 2u);
+ bld.emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
}
if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
@@ -400,24 +398,24 @@ vec4_gs_visitor::emit_control_data_bits()
* together.
*/
src_reg channel(this, glsl_type::uint_type);
- inst = emit(AND(dst_reg(channel), dword_index, 3u));
+ inst = bld.AND(dst_reg(channel), dword_index, 3u);
inst->force_writemask_all = true;
src_reg one(this, glsl_type::uint_type);
- inst = emit(MOV(dst_reg(one), 1u));
+ inst = bld.MOV(dst_reg(one), 1u);
inst->force_writemask_all = true;
src_reg channel_mask(this, glsl_type::uint_type);
- inst = emit(SHL(dst_reg(channel_mask), one, channel));
+ inst = bld.SHL(dst_reg(channel_mask), one, channel);
inst->force_writemask_all = true;
- emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
+ bld.emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
channel_mask);
- emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
+ bld.emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
}
/* Store the control data bits in the message payload and send it. */
dst_reg mrf_reg2(MRF, base_mrf + 1);
- inst = emit(MOV(mrf_reg2, this->control_data_bits));
+ inst = bld.MOV(mrf_reg2, this->control_data_bits);
inst->force_writemask_all = true;
- inst = emit(GS_OPCODE_URB_WRITE);
+ inst = bld.emit(GS_OPCODE_URB_WRITE);
inst->urb_write_flags = urb_write_flags;
/* We need to increment Global Offset by 256-bits to make room for
* Broadwell's extra "Vertex Count" payload at the beginning of the
@@ -429,7 +427,7 @@ vec4_gs_visitor::emit_control_data_bits()
inst->base_mrf = base_mrf;
inst->mlen = 2;
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
}
void
@@ -455,11 +453,11 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
/* reg::sid = stream_id */
src_reg sid(this, glsl_type::uint_type);
- emit(MOV(dst_reg(sid), stream_id));
+ bld.MOV(dst_reg(sid), stream_id);
/* reg:shift_count = 2 * (vertex_count - 1) */
src_reg shift_count(this, glsl_type::uint_type);
- emit(SHL(dst_reg(shift_count), this->vertex_count, 1u));
+ bld.SHL(dst_reg(shift_count), this->vertex_count, 1u);
/* Note: we're relying on the fact that the GEN SHL instruction only pays
* attention to the lower 5 bits of its second source argument, so on this
@@ -467,23 +465,23 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
* stream_id << ((2 * (vertex_count - 1)) % 32).
*/
src_reg mask(this, glsl_type::uint_type);
- emit(SHL(dst_reg(mask), sid, shift_count));
- emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
+ bld.SHL(dst_reg(mask), sid, shift_count);
+ bld.OR(dst_reg(this->control_data_bits), this->control_data_bits, mask);
}
void
vec4_gs_visitor::visit(ir_emit_vertex *ir)
{
- this->current_annotation = "emit vertex: safety check";
+ bld.set_annotation("emit vertex: safety check");
/* To ensure that we don't output more vertices than the shader specified
* using max_vertices, do the logic inside a conditional of the form "if
* (vertex_count < MAX)"
*/
unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count,
- src_reg(num_output_vertices), BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.CMP(bld.reg_null_d(), this->vertex_count,
+ src_reg(num_output_vertices), BRW_CONDITIONAL_L);
+ bld.IF(BRW_PREDICATE_NORMAL);
{
/* If we're outputting 32 control data bits or less, then we can wait
* until the shader is over to output them all. Otherwise we need to
@@ -493,7 +491,7 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
* correct.
*/
if (c->control_data_header_size_bits > 32) {
- this->current_annotation = "emit vertex: emit control data bits";
+ bld.set_annotation("emit vertex: emit control data bits");
/* Only emit control data bits if we've finished accumulating a batch
* of 32 bits. This is the case when:
*
@@ -513,10 +511,10 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
* vertex_count & (32 / bits_per_vertex - 1) == 0
*/
vec4_instruction *inst =
- emit(AND(dst_null_d(), this->vertex_count,
- (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+ bld.AND(bld.reg_null_d(), this->vertex_count,
+ (uint32_t) (32 / c->control_data_bits_per_vertex - 1));
inst->conditional_mod = BRW_CONDITIONAL_Z;
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.IF(BRW_PREDICATE_NORMAL);
{
emit_control_data_bits();
@@ -527,13 +525,13 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
* effect of any call to EndPrimitive() that the shader may have
* made before outputting its first vertex.
*/
- inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+ inst = bld.MOV(dst_reg(this->control_data_bits), 0u);
inst->force_writemask_all = true;
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
}
- this->current_annotation = "emit vertex: vertex data";
+ bld.set_annotation("emit vertex: vertex data");
emit_vertex();
/* In stream mode we have to set control data bits for all vertices
@@ -543,17 +541,17 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
if (c->control_data_header_size_bits > 0 &&
c->prog_data.control_data_format ==
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
- this->current_annotation = "emit vertex: Stream control data bits";
+ bld.set_annotation("emit vertex: Stream control data bits");
set_stream_control_data_bits(ir->stream_id());
}
- this->current_annotation = "emit vertex: increment vertex count";
- emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
- src_reg(1u)));
+ bld.set_annotation("emit vertex: increment vertex count");
+ bld.ADD(dst_reg(this->vertex_count), this->vertex_count,
+ src_reg(1u));
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
- this->current_annotation = NULL;
+ bld.set_annotation(NULL);
}
void
@@ -594,17 +592,17 @@ vec4_gs_visitor::visit(ir_end_primitive *)
/* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
src_reg one(this, glsl_type::uint_type);
- emit(MOV(dst_reg(one), 1u));
+ bld.MOV(dst_reg(one), 1u);
src_reg prev_count(this, glsl_type::uint_type);
- emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+ bld.ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu);
src_reg mask(this, glsl_type::uint_type);
/* Note: we're relying on the fact that the GEN SHL instruction only pays
* attention to the lower 5 bits of its second source argument, so on this
* architecture, 1 << (vertex_count - 1) is equivalent to 1 <<
* ((vertex_count - 1) % 32).
*/
- emit(SHL(dst_reg(mask), one, prev_count));
- emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
+ bld.SHL(dst_reg(mask), one, prev_count);
+ bld.OR(dst_reg(this->control_data_bits), this->control_data_bits, mask);
}
static const unsigned *
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index 8bf11facb0b..1c8e7ad876c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -91,7 +91,7 @@ protected:
virtual void emit_thread_end();
virtual void emit_urb_write_header(int mrf);
virtual vec4_instruction *emit_urb_write_opcode(bool complete);
- virtual int compute_array_stride(ir_dereference_array *ir);
+ virtual unsigned get_array_stride(ir_dereference_array *ir);
virtual void visit(ir_emit_vertex *);
virtual void visit(ir_end_primitive *);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index de04d881d8b..25a48fd5f4e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -23,7 +23,6 @@
#include "brw_vec4.h"
#include "brw_cfg.h"
-#include "glsl/ir_uniform.h"
extern "C" {
#include "program/sampler.h"
}
@@ -62,198 +61,11 @@ vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
}
vec4_instruction *
-vec4_visitor::emit(vec4_instruction *inst)
+vec4_visitor::SCRATCH_READ(vec4_builder &bld, const dst_reg &dst,
+ const src_reg &index)
{
- inst->ir = this->base_ir;
- inst->annotation = this->current_annotation;
-
- this->instructions.push_tail(inst);
-
- return inst;
-}
-
-vec4_instruction *
-vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst,
- vec4_instruction *new_inst)
-{
- new_inst->ir = inst->ir;
- new_inst->annotation = inst->annotation;
-
- inst->insert_before(block, new_inst);
-
- return inst;
-}
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode, dst_reg dst,
- src_reg src0, src_reg src1, src_reg src2)
-{
- return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
-}
-
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
-{
- return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
-}
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
-{
- return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
-}
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode, dst_reg dst)
-{
- return emit(new(mem_ctx) vec4_instruction(opcode, dst));
-}
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode)
-{
- return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
-}
-
-#define ALU1(op) \
- vec4_instruction * \
- vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
- { \
- return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
- }
-
-#define ALU2(op) \
- vec4_instruction * \
- vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
- const src_reg &src1) \
- { \
- return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
- src0, src1); \
- }
-
-#define ALU2_ACC(op) \
- vec4_instruction * \
- vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
- const src_reg &src1) \
- { \
- vec4_instruction *inst = new(mem_ctx) vec4_instruction( \
- BRW_OPCODE_##op, dst, src0, src1); \
- inst->writes_accumulator = true; \
- return inst; \
- }
-
-#define ALU3(op) \
- vec4_instruction * \
- vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
- const src_reg &src1, const src_reg &src2) \
- { \
- assert(brw->gen >= 6); \
- return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
- src0, src1, src2); \
- }
-
-ALU1(NOT)
-ALU1(MOV)
-ALU1(FRC)
-ALU1(RNDD)
-ALU1(RNDE)
-ALU1(RNDZ)
-ALU1(F32TO16)
-ALU1(F16TO32)
-ALU2(ADD)
-ALU2(MUL)
-ALU2_ACC(MACH)
-ALU2(AND)
-ALU2(OR)
-ALU2(XOR)
-ALU2(DP3)
-ALU2(DP4)
-ALU2(DPH)
-ALU2(SHL)
-ALU2(SHR)
-ALU2(ASR)
-ALU3(LRP)
-ALU1(BFREV)
-ALU3(BFE)
-ALU2(BFI1)
-ALU3(BFI2)
-ALU1(FBH)
-ALU1(FBL)
-ALU1(CBIT)
-ALU3(MAD)
-ALU2_ACC(ADDC)
-ALU2_ACC(SUBB)
-ALU2(MAC)
-
-/** Gen4 predicated IF. */
-vec4_instruction *
-vec4_visitor::IF(enum brw_predicate predicate)
-{
- vec4_instruction *inst;
-
- inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
- inst->predicate = predicate;
-
- return inst;
-}
-
-/** Gen6 IF with embedded comparison. */
-vec4_instruction *
-vec4_visitor::IF(src_reg src0, src_reg src1,
- enum brw_conditional_mod condition)
-{
- assert(brw->gen == 6);
-
- vec4_instruction *inst;
-
- resolve_ud_negate(&src0);
- resolve_ud_negate(&src1);
-
- inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
- src0, src1);
- inst->conditional_mod = condition;
-
- return inst;
-}
-
-/**
- * CMP: Sets the low bit of the destination channels with the result
- * of the comparison, while the upper bits are undefined, and updates
- * the flag register with the packed 16 bits of the result.
- */
-vec4_instruction *
-vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
- enum brw_conditional_mod condition)
-{
- vec4_instruction *inst;
-
- /* original gen4 does type conversion to the destination type
- * before before comparison, producing garbage results for floating
- * point comparisons.
- */
- if (brw->gen == 4) {
- dst.type = src0.type;
- if (dst.file == HW_REG)
- dst.fixed_hw_reg.type = dst.type;
- }
-
- resolve_ud_negate(&src0);
- resolve_ud_negate(&src1);
-
- inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
- inst->conditional_mod = condition;
-
- return inst;
-}
-
-vec4_instruction *
-vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
-{
- vec4_instruction *inst;
-
- inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
- dst, index);
+ instruction *inst = bld.emit(SHADER_OPCODE_GEN4_SCRATCH_READ,
+ dst, index);
inst->base_mrf = 14;
inst->mlen = 2;
@@ -261,13 +73,11 @@ vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
}
vec4_instruction *
-vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
- const src_reg &index)
+vec4_visitor::SCRATCH_WRITE(vec4_builder &bld, const dst_reg &dst,
+ const src_reg &src, const src_reg &index)
{
- vec4_instruction *inst;
-
- inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
- dst, src, index);
+ instruction *inst = bld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
+ dst, src, index);
inst->base_mrf = 13;
inst->mlen = 3;
@@ -275,167 +85,48 @@ vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
}
void
-vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
+vec4_visitor::emit_pull_constant_load(vec4_builder &bld,
+ const dst_reg &dst,
+ const src_reg &surf_index,
+ uint32_t off,
+ const src_reg *reladdr,
+ unsigned num_components)
{
- static enum opcode dot_opcodes[] = {
- BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
- };
-
- emit(dot_opcodes[elements - 2], dst, src0, src1);
-}
-
-src_reg
-vec4_visitor::fix_3src_operand(src_reg src)
-{
- /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
- * able to use vertical stride of zero to replicate the vec4 uniform, like
- *
- * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
- *
- * But you can't, since vertical stride is always four in three-source
- * instructions. Instead, insert a MOV instruction to do the replication so
- * that the three-source instruction can consume it.
- */
-
- /* The MOV is only needed if the source is a uniform or immediate. */
- if (src.file != UNIFORM && src.file != IMM)
- return src;
-
- if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
- return src;
-
- dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
- expanded.type = src.type;
- emit(MOV(expanded, src));
- return src_reg(expanded);
-}
-
-src_reg
-vec4_visitor::fix_math_operand(src_reg src)
-{
- /* The gen6 math instruction ignores the source modifiers --
- * swizzle, abs, negate, and at least some parts of the register
- * region description.
- *
- * Rather than trying to enumerate all these cases, *always* expand the
- * operand to a temp GRF for gen6.
- *
- * For gen7, keep the operand as-is, except if immediate, which gen7 still
- * can't use.
+ /* Pre-gen6, the message header uses byte offsets instead of vec4
+ * (16-byte) offset units.
*/
+ const unsigned scale = (brw->gen >= 6 ? 16 : 1);
+ src_reg result(bld.vector(num_components).natural_reg(dst.type));
+ src_reg addr;
- if (brw->gen == 7 && src.file != IMM)
- return src;
-
- dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
- expanded.type = src.type;
- emit(MOV(expanded, src));
- return src_reg(expanded);
-}
-
-void
-vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
-{
- src = fix_math_operand(src);
-
- if (brw->gen == 6 && dst.writemask != WRITEMASK_XYZW) {
- /* The gen6 math instruction must be align1, so we can't do
- * writemasks.
- */
- dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
-
- emit(opcode, temp_dst, src);
-
- emit(MOV(dst, src_reg(temp_dst)));
+ if (reladdr) {
+ addr = src_reg(bld.scalar_reg(reladdr->type));
+ bld.ADD(dst_reg(addr), *reladdr, src_reg(off / 16));
+ if (scale == 1)
+ bld.SHL(dst_reg(addr), addr, src_reg(4));
} else {
- emit(opcode, dst, src);
+ addr = src_reg((off & ~0xf) / scale);
}
-}
-void
-vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
-{
- vec4_instruction *inst = emit(opcode, dst, src);
- inst->base_mrf = 1;
- inst->mlen = 1;
-}
-
-void
-vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
-{
- switch (opcode) {
- case SHADER_OPCODE_RCP:
- case SHADER_OPCODE_RSQ:
- case SHADER_OPCODE_SQRT:
- case SHADER_OPCODE_EXP2:
- case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_SIN:
- case SHADER_OPCODE_COS:
- break;
- default:
- unreachable("not reached: bad math opcode");
- }
-
- if (brw->gen >= 8) {
- emit(opcode, dst, src);
- } else if (brw->gen >= 6) {
- emit_math1_gen6(opcode, dst, src);
- } else {
- emit_math1_gen4(opcode, dst, src);
- }
-}
-
-void
-vec4_visitor::emit_math2_gen6(enum opcode opcode,
- dst_reg dst, src_reg src0, src_reg src1)
-{
- src0 = fix_math_operand(src0);
- src1 = fix_math_operand(src1);
-
- if (brw->gen == 6 && dst.writemask != WRITEMASK_XYZW) {
- /* The gen6 math instruction must be align1, so we can't do
- * writemasks.
- */
- dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
- temp_dst.type = dst.type;
-
- emit(opcode, temp_dst, src0, src1);
+ if (brw->gen >= 7) {
+ if (addr.file == IMM) {
+ dst_reg tmp = bld.scalar_reg(addr.type);
+ bld.MOV(tmp, addr);
+ addr = src_reg(tmp);
+ }
- emit(MOV(dst, src_reg(temp_dst)));
+ bld.emit(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, dst_reg(result),
+ surf_index, addr);
} else {
- emit(opcode, dst, src0, src1);
- }
-}
-
-void
-vec4_visitor::emit_math2_gen4(enum opcode opcode,
- dst_reg dst, src_reg src0, src_reg src1)
-{
- vec4_instruction *inst = emit(opcode, dst, src0, src1);
- inst->base_mrf = 1;
- inst->mlen = 2;
-}
-
-void
-vec4_visitor::emit_math(enum opcode opcode,
- dst_reg dst, src_reg src0, src_reg src1)
-{
- switch (opcode) {
- case SHADER_OPCODE_POW:
- case SHADER_OPCODE_INT_QUOTIENT:
- case SHADER_OPCODE_INT_REMAINDER:
- break;
- default:
- unreachable("not reached: unsupported binary math opcode");
+ vec4_instruction *pull = bld.emit(VS_OPCODE_PULL_CONSTANT_LOAD,
+ dst_reg(result), surf_index, addr);
+ pull->base_mrf = 14;
+ pull->mlen = 1;
}
- if (brw->gen >= 8) {
- emit(opcode, dst, src0, src1);
- } else if (brw->gen >= 6) {
- emit_math2_gen6(opcode, dst, src0, src1);
- } else {
- emit_math2_gen4(opcode, dst, src0, src1);
- }
+ result.swizzle += BRW_SWIZZLE4(off % 16 / 4, off % 16 / 4,
+ off % 16 / 4, off % 16 / 4);
+ bld.MOV(dst, result);
}
void
@@ -486,7 +177,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
* You should inspect the disasm output in order to verify that the MOV is
* not optimized away.
*/
- emit(MOV(tmp_dst, src_reg(0x12345678u)));
+ bld.MOV(tmp_dst, src_reg(0x12345678u));
#endif
/* Give tmp the form below, where "." means untouched.
@@ -499,20 +190,20 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
* relies on the undocumented hardware behavior mentioned above.
*/
tmp_dst.writemask = WRITEMASK_XY;
- emit(F32TO16(tmp_dst, src0));
+ bld.F32TO16(tmp_dst, src0);
/* Give the write-channels of dst the form:
* 0xhhhh0000
*/
tmp_src.swizzle = BRW_SWIZZLE_YYYY;
- emit(SHL(dst, tmp_src, src_reg(16u)));
+ bld.SHL(dst, tmp_src, src_reg(16u));
/* Finally, give the write-channels of dst the form of packHalf2x16's
* output:
* 0xhhhhllll
*/
tmp_src.swizzle = BRW_SWIZZLE_XXXX;
- emit(OR(dst, src_reg(dst), tmp_src));
+ bld.OR(dst, src_reg(dst), tmp_src);
}
void
@@ -544,70 +235,13 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
src_reg tmp_src(tmp_dst);
tmp_dst.writemask = WRITEMASK_X;
- emit(AND(tmp_dst, src0, src_reg(0xffffu)));
+ bld.AND(tmp_dst, src0, src_reg(0xffffu));
tmp_dst.writemask = WRITEMASK_Y;
- emit(SHR(tmp_dst, src0, src_reg(16u)));
+ bld.SHR(tmp_dst, src0, src_reg(16u));
dst.writemask = WRITEMASK_XY;
- emit(F16TO32(dst, tmp_src));
-}
-
-void
-vec4_visitor::visit_instructions(const exec_list *list)
-{
- foreach_in_list(ir_instruction, ir, list) {
- base_ir = ir;
- ir->accept(this);
- }
-}
-
-
-static int
-type_size(const struct glsl_type *type)
-{
- unsigned int i;
- int size;
-
- switch (type->base_type) {
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_INT:
- case GLSL_TYPE_FLOAT:
- case GLSL_TYPE_BOOL:
- if (type->is_matrix()) {
- return type->matrix_columns;
- } else {
- /* Regardless of size of vector, it gets a vec4. This is bad
- * packing for things like floats, but otherwise arrays become a
- * mess. Hopefully a later pass over the code can pack scalars
- * down if appropriate.
- */
- return 1;
- }
- case GLSL_TYPE_ARRAY:
- assert(type->length > 0);
- return type_size(type->fields.array) * type->length;
- case GLSL_TYPE_STRUCT:
- size = 0;
- for (i = 0; i < type->length; i++) {
- size += type_size(type->fields.structure[i].type);
- }
- return size;
- case GLSL_TYPE_SAMPLER:
- /* Samplers take up no register space, since they're baked in at
- * link time.
- */
- return 0;
- case GLSL_TYPE_ATOMIC_UINT:
- return 0;
- case GLSL_TYPE_IMAGE:
- case GLSL_TYPE_VOID:
- case GLSL_TYPE_ERROR:
- case GLSL_TYPE_INTERFACE:
- unreachable("not reached");
- }
-
- return 0;
+ bld.F16TO32(dst, tmp_src);
}
src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
@@ -615,7 +249,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
init();
this->file = GRF;
- this->reg = v->alloc.allocate(type_size(type));
+ this->reg = v->alloc.allocate(v->type_size(type));
if (type->is_array() || type->is_record()) {
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -633,7 +267,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
init();
this->file = GRF;
- this->reg = v->alloc.allocate(type_size(type) * size);
+ this->reg = v->alloc.allocate(v->type_size(type) * size);
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -645,7 +279,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
init();
this->file = GRF;
- this->reg = v->alloc.allocate(type_size(type));
+ this->reg = v->alloc.allocate(v->type_size(type));
if (type->is_array() || type->is_record()) {
this->writemask = WRITEMASK_XYZW;
@@ -656,55 +290,6 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
this->type = brw_type_for_base_type(type);
}
-/* Our support for uniforms is piggy-backed on the struct
- * gl_fragment_program, because that's where the values actually
- * get stored, rather than in some global gl_shader_program uniform
- * store.
- */
-void
-vec4_visitor::setup_uniform_values(ir_variable *ir)
-{
- int namelen = strlen(ir->name);
-
- /* The data for our (non-builtin) uniforms is stored in a series of
- * gl_uniform_driver_storage structs for each subcomponent that
- * glGetUniformLocation() could name. We know it's been set up in the same
- * order we'd walk the type, so walk the list of storage and find anything
- * with our name, or the prefix of a component that starts with our name.
- */
- for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
- struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
- if (strncmp(ir->name, storage->name, namelen) != 0 ||
- (storage->name[namelen] != 0 &&
- storage->name[namelen] != '.' &&
- storage->name[namelen] != '[')) {
- continue;
- }
-
- gl_constant_value *components = storage->storage;
- unsigned vector_count = (MAX2(storage->array_elements, 1) *
- storage->type->matrix_columns);
-
- for (unsigned s = 0; s < vector_count; s++) {
- assert(uniforms < uniform_array_size);
- uniform_vector_size[uniforms] = storage->type->vector_elements;
-
- int i;
- for (i = 0; i < uniform_vector_size[uniforms]; i++) {
- stage_prog_data->param[uniforms * 4 + i] = components;
- components++;
- }
- for (; i < 4; i++) {
- static gl_constant_value zero = { 0.0 };
- stage_prog_data->param[uniforms * 4 + i] = &zero;
- }
-
- uniforms++;
- }
- }
-}
-
void
vec4_visitor::setup_uniform_clipplane_values()
{
@@ -723,270 +308,6 @@ vec4_visitor::setup_uniform_clipplane_values()
}
}
-/* Our support for builtin uniforms is even scarier than non-builtin.
- * It sits on top of the PROG_STATE_VAR parameters that are
- * automatically updated from GL context state.
- */
-void
-vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
-{
- const ir_state_slot *const slots = ir->get_state_slots();
- assert(slots != NULL);
-
- for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
- /* This state reference has already been setup by ir_to_mesa,
- * but we'll get the same index back here. We can reference
- * ParameterValues directly, since unlike brw_fs.cpp, we never
- * add new state references during compile.
- */
- int index = _mesa_add_state_reference(this->prog->Parameters,
- (gl_state_index *)slots[i].tokens);
- gl_constant_value *values =
- &this->prog->Parameters->ParameterValues[index][0];
-
- assert(this->uniforms < uniform_array_size);
- this->uniform_vector_size[this->uniforms] = 0;
- /* Add each of the unique swizzled channels of the element.
- * This will end up matching the size of the glsl_type of this field.
- */
- int last_swiz = -1;
- for (unsigned int j = 0; j < 4; j++) {
- int swiz = GET_SWZ(slots[i].swizzle, j);
- last_swiz = swiz;
-
- stage_prog_data->param[this->uniforms * 4 + j] = &values[swiz];
- assert(this->uniforms < uniform_array_size);
- if (swiz <= last_swiz)
- this->uniform_vector_size[this->uniforms]++;
- }
- this->uniforms++;
- }
-}
-
-dst_reg *
-vec4_visitor::variable_storage(ir_variable *var)
-{
- return (dst_reg *)hash_table_find(this->variable_ht, var);
-}
-
-void
-vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,
- enum brw_predicate *predicate)
-{
- ir_expression *expr = ir->as_expression();
-
- *predicate = BRW_PREDICATE_NORMAL;
-
- if (expr && expr->operation != ir_binop_ubo_load) {
- src_reg op[3];
- vec4_instruction *inst;
-
- assert(expr->get_num_operands() <= 3);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- expr->operands[i]->accept(this);
- op[i] = this->result;
-
- resolve_ud_negate(&op[i]);
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
-
- case ir_binop_logic_xor:
- inst = emit(XOR(dst_null_d(), op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_logic_or:
- inst = emit(OR(dst_null_d(), op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_logic_and:
- inst = emit(AND(dst_null_d(), op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_unop_f2b:
- if (brw->gen >= 6) {
- emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
- } else {
- inst = emit(MOV(dst_null_f(), op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_unop_i2b:
- if (brw->gen >= 6) {
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- } else {
- inst = emit(MOV(dst_null_d(), op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_binop_all_equal:
- inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
- *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
- break;
-
- case ir_binop_any_nequal:
- inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
- *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
- break;
-
- case ir_unop_any:
- inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
- break;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_nequal:
- emit(CMP(dst_null_d(), op[0], op[1],
- brw_conditional_for_comparison(expr->operation)));
- break;
-
- case ir_triop_csel: {
- /* Expand the boolean condition into the flag register. */
- inst = emit(MOV(dst_null_d(), op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
- /* Select which boolean to return. */
- dst_reg temp(this, expr->operands[1]->type);
- inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- /* Expand the result to a condition code. */
- inst = emit(MOV(dst_null_d(), src_reg(temp)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
- }
-
- default:
- unreachable("not reached");
- }
- return;
- }
-
- ir->accept(this);
-
- resolve_ud_negate(&this->result);
-
- if (brw->gen >= 6) {
- vec4_instruction *inst = emit(AND(dst_null_d(),
- this->result, src_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- } else {
- vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
-}
-
-/**
- * Emit a gen6 IF statement with the comparison folded into the IF
- * instruction.
- */
-void
-vec4_visitor::emit_if_gen6(ir_if *ir)
-{
- ir_expression *expr = ir->condition->as_expression();
-
- if (expr && expr->operation != ir_binop_ubo_load) {
- src_reg op[3];
- dst_reg temp;
-
- assert(expr->get_num_operands() <= 3);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- expr->operands[i]->accept(this);
- op[i] = this->result;
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
- return;
-
- case ir_binop_logic_xor:
- emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_logic_or:
- temp = dst_reg(this, glsl_type::bool_type);
- emit(OR(temp, op[0], op[1]));
- emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_logic_and:
- temp = dst_reg(this, glsl_type::bool_type);
- emit(AND(temp, op[0], op[1]));
- emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_unop_f2b:
- emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_unop_i2b:
- emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_nequal:
- emit(IF(op[0], op[1],
- brw_conditional_for_comparison(expr->operation)));
- return;
-
- case ir_binop_all_equal:
- emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
- emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
- return;
-
- case ir_binop_any_nequal:
- emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
- emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
- return;
-
- case ir_unop_any:
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
- return;
-
- case ir_triop_csel: {
- /* Expand the boolean condition into the flag register. */
- vec4_instruction *inst = emit(MOV(dst_null_d(), op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
- /* Select which boolean to return. */
- dst_reg temp(this, expr->operands[1]->type);
- inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
- return;
- }
-
- default:
- unreachable("not reached");
- }
- return;
- }
-
- ir->condition->accept(this);
-
- emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
-}
-
void
vec4_visitor::visit(ir_variable *ir)
{
@@ -995,1803 +316,31 @@ vec4_visitor::visit(ir_variable *ir)
if (variable_storage(ir))
return;
- switch (ir->data.mode) {
- case ir_var_shader_in:
- reg = new(mem_ctx) dst_reg(ATTR, ir->data.location);
- break;
+ if (ir->data.mode == ir_var_shader_in) {
+ reg = new(mem_ctx) dst_reg(resize(dst_reg(ATTR, ir->data.location),
+ type_vector_size(ir->type)));
- case ir_var_shader_out:
- reg = new(mem_ctx) dst_reg(this, ir->type);
+ } else if (ir->data.mode == ir_var_shader_out) {
+ reg = new(mem_ctx) dst_reg(temporary_reg(ir->type));
for (int i = 0; i < type_size(ir->type); i++) {
- output_reg[ir->data.location + i] = *reg;
- output_reg[ir->data.location + i].reg_offset = i;
- output_reg[ir->data.location + i].type =
+ output_reg[ir->data.location + i] = *reg;
+ output_reg[ir->data.location + i].reg_offset = i;
+ output_reg[ir->data.location + i].type =
brw_type_for_base_type(ir->type->get_scalar_type());
- output_reg_annotation[ir->data.location + i] = ir->name;
- }
- break;
-
- case ir_var_auto:
- case ir_var_temporary:
- reg = new(mem_ctx) dst_reg(this, ir->type);
- break;
-
- case ir_var_uniform:
- reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
-
- /* Thanks to the lower_ubo_reference pass, we will see only
- * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
- * variables, so no need for them to be in variable_ht.
- *
- * Some uniforms, such as samplers and atomic counters, have no actual
- * storage, so we should ignore them.
- */
- if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
- return;
-
- /* Track how big the whole uniform variable is, in case we need to put a
- * copy of its data into pull constants for array access.
- */
- assert(this->uniforms < uniform_array_size);
- this->uniform_size[this->uniforms] = type_size(ir->type);
-
- if (!strncmp(ir->name, "gl_", 3)) {
- setup_builtin_uniform_values(ir);
- } else {
- setup_uniform_values(ir);
+ output_reg_annotation[ir->data.location + i] = ir->name;
}
- break;
- case ir_var_system_value:
+ } else if (ir->data.mode == ir_var_system_value) {
reg = make_reg_for_system_value(ir);
- break;
-
- default:
- unreachable("not reached");
- }
-
- reg->type = brw_type_for_base_type(ir->type);
- hash_table_insert(this->variable_ht, reg, ir);
-}
-
-void
-vec4_visitor::visit(ir_loop *ir)
-{
- /* We don't want debugging output to print the whole body of the
- * loop as the annotation.
- */
- this->base_ir = NULL;
-
- emit(BRW_OPCODE_DO);
-
- visit_instructions(&ir->body_instructions);
-
- emit(BRW_OPCODE_WHILE);
-}
-
-void
-vec4_visitor::visit(ir_loop_jump *ir)
-{
- switch (ir->mode) {
- case ir_loop_jump::jump_break:
- emit(BRW_OPCODE_BREAK);
- break;
- case ir_loop_jump::jump_continue:
- emit(BRW_OPCODE_CONTINUE);
- break;
- }
-}
-
-
-void
-vec4_visitor::visit(ir_function_signature *)
-{
- unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_function *ir)
-{
- /* Ignore function bodies other than main() -- we shouldn't see calls to
- * them since they should all be inlined.
- */
- if (strcmp(ir->name, "main") == 0) {
- const ir_function_signature *sig;
- exec_list empty;
-
- sig = ir->matching_signature(NULL, &empty, false);
-
- assert(sig);
-
- visit_instructions(&sig->body);
- }
-}
-
-bool
-vec4_visitor::try_emit_mad(ir_expression *ir)
-{
- /* 3-src instructions were introduced in gen6. */
- if (brw->gen < 6)
- return false;
-
- /* MAD can only handle floating-point data. */
- if (ir->type->base_type != GLSL_TYPE_FLOAT)
- return false;
-
- ir_rvalue *nonmul = ir->operands[1];
- ir_expression *mul = ir->operands[0]->as_expression();
-
- if (!mul || mul->operation != ir_binop_mul) {
- nonmul = ir->operands[0];
- mul = ir->operands[1]->as_expression();
-
- if (!mul || mul->operation != ir_binop_mul)
- return false;
- }
-
- nonmul->accept(this);
- src_reg src0 = fix_3src_operand(this->result);
-
- mul->operands[0]->accept(this);
- src_reg src1 = fix_3src_operand(this->result);
-
- mul->operands[1]->accept(this);
- src_reg src2 = fix_3src_operand(this->result);
-
- this->result = src_reg(this, ir->type);
- emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
-
- return true;
-}
-
-bool
-vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir)
-{
- /* This optimization relies on CMP setting the destination to 0 when
- * false. Early hardware only sets the least significant bit, and
- * leaves the other bits undefined. So we can't use it.
- */
- if (brw->gen < 6)
- return false;
-
- ir_expression *const cmp = ir->operands[0]->as_expression();
-
- if (cmp == NULL)
- return false;
-
- switch (cmp->operation) {
- case ir_binop_less:
- case ir_binop_greater:
- case ir_binop_lequal:
- case ir_binop_gequal:
- case ir_binop_equal:
- case ir_binop_nequal:
- break;
-
- default:
- return false;
- }
-
- cmp->operands[0]->accept(this);
- const src_reg cmp_src0 = this->result;
-
- cmp->operands[1]->accept(this);
- const src_reg cmp_src1 = this->result;
-
- this->result = src_reg(this, ir->type);
-
- emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1,
- brw_conditional_for_comparison(cmp->operation)));
-
- /* If the comparison is false, this->result will just happen to be zero.
- */
- vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result),
- this->result, src_reg(1.0f));
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = true;
-
- return true;
-}
-
-void
-vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
- src_reg src0, src_reg src1)
-{
- vec4_instruction *inst;
-
- if (brw->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->conditional_mod = conditionalmod;
- } else {
- emit(CMP(dst, src0, src1, conditionalmod));
-
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-}
-
-void
-vec4_visitor::emit_lrp(const dst_reg &dst,
- const src_reg &x, const src_reg &y, const src_reg &a)
-{
- if (brw->gen >= 6) {
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(LRP(dst,
- fix_3src_operand(a), fix_3src_operand(y), fix_3src_operand(x)));
- } else {
- /* Earlier generations don't support three source operations, so we
- * need to emit x*(1-a) + y*a.
- */
- dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
- dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
- dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
- y_times_a.writemask = dst.writemask;
- one_minus_a.writemask = dst.writemask;
- x_times_one_minus_a.writemask = dst.writemask;
-
- emit(MUL(y_times_a, y, a));
- emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
- emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
- emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
- }
-}
-
-void
-vec4_visitor::visit(ir_expression *ir)
-{
- unsigned int operand;
- src_reg op[Elements(ir->operands)];
- src_reg result_src;
- dst_reg result_dst;
- vec4_instruction *inst;
-
- if (ir->operation == ir_binop_add) {
- if (try_emit_mad(ir))
- return;
- }
-
- if (ir->operation == ir_unop_b2f) {
- if (try_emit_b2f_of_compare(ir))
- return;
- }
-
- for (operand = 0; operand < ir->get_num_operands(); operand++) {
- this->result.file = BAD_FILE;
- ir->operands[operand]->accept(this);
- if (this->result.file == BAD_FILE) {
- fprintf(stderr, "Failed to get tree for expression operand:\n");
- ir->operands[operand]->fprint(stderr);
- exit(1);
- }
- op[operand] = this->result;
-
- /* Matrix expression operands should have been broken down to vector
- * operations already.
- */
- assert(!ir->operands[operand]->type->is_matrix());
- }
-
- int vector_elements = ir->operands[0]->type->vector_elements;
- if (ir->operands[1]) {
- vector_elements = MAX2(vector_elements,
- ir->operands[1]->type->vector_elements);
- }
-
- this->result.file = BAD_FILE;
-
- /* Storage for our result. Ideally for an assignment we'd be using
- * the actual storage for the result here, instead.
- */
- result_src = src_reg(this, ir->type);
- /* convenience for the emit functions below. */
- result_dst = dst_reg(result_src);
- /* If nothing special happens, this is the result. */
- this->result = result_src;
- /* Limit writes to the channels that will be used by result_src later.
- * This does limit this temp's use as a temporary for multi-instruction
- * sequences.
- */
- result_dst.writemask = (1 << ir->type->vector_elements) - 1;
-
- switch (ir->operation) {
- case ir_unop_logic_not:
- if (ctx->Const.UniformBooleanTrue != 1) {
- emit(NOT(result_dst, op[0]));
- } else {
- emit(XOR(result_dst, op[0], src_reg(1)));
- }
- break;
- case ir_unop_neg:
- op[0].negate = !op[0].negate;
- emit(MOV(result_dst, op[0]));
- break;
- case ir_unop_abs:
- op[0].abs = true;
- op[0].negate = false;
- emit(MOV(result_dst, op[0]));
- break;
-
- case ir_unop_sign:
- if (ir->type->is_float()) {
- /* AND(val, 0x80000000) gives the sign bit.
- *
- * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
- * zero.
- */
- emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
-
- op[0].type = BRW_REGISTER_TYPE_UD;
- result_dst.type = BRW_REGISTER_TYPE_UD;
- emit(AND(result_dst, op[0], src_reg(0x80000000u)));
-
- inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- this->result.type = BRW_REGISTER_TYPE_F;
- } else {
- /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
- * -> non-negative val generates 0x00000000.
- * Predicated OR sets 1 if val is positive.
- */
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
-
- emit(ASR(result_dst, op[0], src_reg(31)));
-
- inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
- break;
-
- case ir_unop_rcp:
- emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
- break;
-
- case ir_unop_exp2:
- emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
- break;
- case ir_unop_log2:
- emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
- break;
- case ir_unop_exp:
- case ir_unop_log:
- unreachable("not reached: should be handled by ir_explog_to_explog2");
- case ir_unop_sin:
- case ir_unop_sin_reduced:
- emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
- break;
- case ir_unop_cos:
- case ir_unop_cos_reduced:
- emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
- break;
-
- case ir_unop_dFdx:
- case ir_unop_dFdx_coarse:
- case ir_unop_dFdx_fine:
- case ir_unop_dFdy:
- case ir_unop_dFdy_coarse:
- case ir_unop_dFdy_fine:
- unreachable("derivatives not valid in vertex shader");
-
- case ir_unop_bitfield_reverse:
- emit(BFREV(result_dst, op[0]));
- break;
- case ir_unop_bit_count:
- emit(CBIT(result_dst, op[0]));
- break;
- case ir_unop_find_msb: {
- src_reg temp = src_reg(this, glsl_type::uint_type);
-
- inst = emit(FBH(dst_reg(temp), op[0]));
- inst->dst.writemask = WRITEMASK_XYZW;
-
- /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
- * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
- * subtract the result from 31 to convert the MSB count into an LSB count.
- */
-
- /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
- temp.swizzle = BRW_SWIZZLE_NOOP;
- emit(MOV(result_dst, temp));
-
- src_reg src_tmp = src_reg(result_dst);
- emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
-
- src_tmp.negate = true;
- inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- break;
- }
- case ir_unop_find_lsb:
- emit(FBL(result_dst, op[0]));
- break;
- case ir_unop_saturate:
- inst = emit(MOV(result_dst, op[0]));
- inst->saturate = true;
- break;
-
- case ir_unop_noise:
- unreachable("not reached: should be handled by lower_noise");
-
- case ir_binop_add:
- emit(ADD(result_dst, op[0], op[1]));
- break;
- case ir_binop_sub:
- unreachable("not reached: should be handled by ir_sub_to_add_neg");
-
- case ir_binop_mul:
- if (brw->gen < 8 && ir->type->is_integer()) {
- /* For integer multiplication, the MUL uses the low 16 bits of one of
- * the operands (src0 through SNB, src1 on IVB and later). The MACH
- * accumulates in the contribution of the upper 16 bits of that
- * operand. If we can determine that one of the args is in the low
- * 16 bits, though, we can just emit a single MUL.
- */
- if (ir->operands[0]->is_uint16_constant()) {
- if (brw->gen < 7)
- emit(MUL(result_dst, op[0], op[1]));
- else
- emit(MUL(result_dst, op[1], op[0]));
- } else if (ir->operands[1]->is_uint16_constant()) {
- if (brw->gen < 7)
- emit(MUL(result_dst, op[1], op[0]));
- else
- emit(MUL(result_dst, op[0], op[1]));
- } else {
- struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(dst_null_d(), op[0], op[1]));
- emit(MOV(result_dst, src_reg(acc)));
- }
- } else {
- emit(MUL(result_dst, op[0], op[1]));
- }
- break;
- case ir_binop_imul_high: {
- struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(result_dst, op[0], op[1]));
- break;
- }
- case ir_binop_div:
- /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
- assert(ir->type->is_integer());
- emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
- break;
- case ir_binop_carry: {
- struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
-
- emit(ADDC(dst_null_ud(), op[0], op[1]));
- emit(MOV(result_dst, src_reg(acc)));
- break;
- }
- case ir_binop_borrow: {
- struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
-
- emit(SUBB(dst_null_ud(), op[0], op[1]));
- emit(MOV(result_dst, src_reg(acc)));
- break;
- }
- case ir_binop_mod:
- /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
- assert(ir->type->is_integer());
- emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
- break;
-
- case ir_binop_less:
- case ir_binop_greater:
- case ir_binop_lequal:
- case ir_binop_gequal:
- case ir_binop_equal:
- case ir_binop_nequal: {
- emit(CMP(result_dst, op[0], op[1],
- brw_conditional_for_comparison(ir->operation)));
- if (ctx->Const.UniformBooleanTrue == 1) {
- emit(AND(result_dst, result_src, src_reg(1)));
- }
- break;
- }
-
- case ir_binop_all_equal:
- /* "==" operator producing a scalar boolean. */
- if (ir->operands[0]->type->is_vector() ||
- ir->operands[1]->type->is_vector()) {
- emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
- emit(MOV(result_dst, src_reg(0)));
- inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue)));
- inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
- } else {
- emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
- if (ctx->Const.UniformBooleanTrue == 1) {
- emit(AND(result_dst, result_src, src_reg(1)));
- }
- }
- break;
- case ir_binop_any_nequal:
- /* "!=" operator producing a scalar boolean. */
- if (ir->operands[0]->type->is_vector() ||
- ir->operands[1]->type->is_vector()) {
- emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
-
- emit(MOV(result_dst, src_reg(0)));
- inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue)));
- inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
- } else {
- emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
- if (ctx->Const.UniformBooleanTrue == 1) {
- emit(AND(result_dst, result_src, src_reg(1)));
- }
- }
- break;
-
- case ir_unop_any:
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- emit(MOV(result_dst, src_reg(0)));
-
- inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue)));
- inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
- break;
-
- case ir_binop_logic_xor:
- emit(XOR(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_logic_or:
- emit(OR(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_logic_and:
- emit(AND(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_dot:
- assert(ir->operands[0]->type->is_vector());
- assert(ir->operands[0]->type == ir->operands[1]->type);
- emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
- break;
-
- case ir_unop_sqrt:
- emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
- break;
- case ir_unop_rsq:
- emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
- break;
-
- case ir_unop_bitcast_i2f:
- case ir_unop_bitcast_u2f:
- this->result = op[0];
- this->result.type = BRW_REGISTER_TYPE_F;
- break;
-
- case ir_unop_bitcast_f2i:
- this->result = op[0];
- this->result.type = BRW_REGISTER_TYPE_D;
- break;
-
- case ir_unop_bitcast_f2u:
- this->result = op[0];
- this->result.type = BRW_REGISTER_TYPE_UD;
- break;
-
- case ir_unop_i2f:
- case ir_unop_i2u:
- case ir_unop_u2i:
- case ir_unop_u2f:
- case ir_unop_f2i:
- case ir_unop_f2u:
- emit(MOV(result_dst, op[0]));
- break;
- case ir_unop_b2i:
- if (ctx->Const.UniformBooleanTrue != 1) {
- emit(AND(result_dst, op[0], src_reg(1)));
- } else {
- emit(MOV(result_dst, op[0]));
- }
- break;
- case ir_unop_b2f:
- if (ctx->Const.UniformBooleanTrue != 1) {
- op[0].type = BRW_REGISTER_TYPE_UD;
- result_dst.type = BRW_REGISTER_TYPE_UD;
- emit(AND(result_dst, op[0], src_reg(0x3f800000u)));
- result_dst.type = BRW_REGISTER_TYPE_F;
- } else {
- emit(MOV(result_dst, op[0]));
- }
- break;
- case ir_unop_f2b:
- case ir_unop_i2b:
- emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
- if (ctx->Const.UniformBooleanTrue == 1) {
- emit(AND(result_dst, result_src, src_reg(1)));
- }
- break;
-
- case ir_unop_trunc:
- emit(RNDZ(result_dst, op[0]));
- break;
- case ir_unop_ceil:
- op[0].negate = !op[0].negate;
- inst = emit(RNDD(result_dst, op[0]));
- this->result.negate = true;
- break;
- case ir_unop_floor:
- inst = emit(RNDD(result_dst, op[0]));
- break;
- case ir_unop_fract:
- inst = emit(FRC(result_dst, op[0]));
- break;
- case ir_unop_round_even:
- emit(RNDE(result_dst, op[0]));
- break;
-
- case ir_binop_min:
- emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
- break;
- case ir_binop_max:
- emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
- break;
-
- case ir_binop_pow:
- emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
- break;
-
- case ir_unop_bit_not:
- inst = emit(NOT(result_dst, op[0]));
- break;
- case ir_binop_bit_and:
- inst = emit(AND(result_dst, op[0], op[1]));
- break;
- case ir_binop_bit_xor:
- inst = emit(XOR(result_dst, op[0], op[1]));
- break;
- case ir_binop_bit_or:
- inst = emit(OR(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_lshift:
- inst = emit(SHL(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_rshift:
- if (ir->type->base_type == GLSL_TYPE_INT)
- inst = emit(ASR(result_dst, op[0], op[1]));
- else
- inst = emit(SHR(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_bfm:
- emit(BFI1(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_ubo_load: {
- ir_constant *const_uniform_block = ir->operands[0]->as_constant();
- ir_constant *const_offset_ir = ir->operands[1]->as_constant();
- unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
- src_reg offset;
-
- /* Now, load the vector from that offset. */
- assert(ir->type->is_vector() || ir->type->is_scalar());
-
- src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
- packed_consts.type = result.type;
- src_reg surf_index;
-
- if (const_uniform_block) {
- /* The block index is a constant, so just emit the binding table entry
- * as an immediate.
- */
- surf_index = src_reg(prog_data->base.binding_table.ubo_start +
- const_uniform_block->value.u[0]);
- } else {
- /* The block index is not a constant. Evaluate the index expression
- * per-channel and add the base UBO index; the generator will select
- * a value from any live channel.
- */
- surf_index = src_reg(this, glsl_type::uint_type);
- emit(ADD(dst_reg(surf_index), op[0],
- src_reg(prog_data->base.binding_table.ubo_start)));
-
- /* Assume this may touch any UBO. It would be nice to provide
- * a tighter bound, but the array information is already lowered away.
- */
- brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
- }
-
- if (const_offset_ir) {
- if (brw->gen >= 8) {
- /* Store the offset in a GRF so we can send-from-GRF. */
- offset = src_reg(this, glsl_type::int_type);
- emit(MOV(dst_reg(offset), src_reg(const_offset / 16)));
- } else {
- /* Immediates are fine on older generations since they'll be moved
- * to a (potentially fake) MRF at the generator level.
- */
- offset = src_reg(const_offset / 16);
- }
- } else {
- offset = src_reg(this, glsl_type::uint_type);
- emit(SHR(dst_reg(offset), op[1], src_reg(4)));
- }
-
- if (brw->gen >= 7) {
- dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
- grf_offset.type = offset.type;
-
- emit(MOV(grf_offset, offset));
-
- emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
- dst_reg(packed_consts),
- surf_index,
- src_reg(grf_offset)));
- } else {
- vec4_instruction *pull =
- emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
- dst_reg(packed_consts),
- surf_index,
- offset));
- pull->base_mrf = 14;
- pull->mlen = 1;
- }
-
- packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
- packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4);
-
- /* UBO bools are any nonzero int. We need to convert them to use the
- * value of true stored in ctx->Const.UniformBooleanTrue.
- */
- if (ir->type->base_type == GLSL_TYPE_BOOL) {
- emit(CMP(result_dst, packed_consts, src_reg(0u),
- BRW_CONDITIONAL_NZ));
- if (ctx->Const.UniformBooleanTrue == 1) {
- emit(AND(result_dst, result, src_reg(1)));
- }
- } else {
- emit(MOV(result_dst, packed_consts));
- }
- break;
- }
-
- case ir_binop_vector_extract:
- unreachable("should have been lowered by vec_index_to_cond_assign");
-
- case ir_triop_fma:
- op[0] = fix_3src_operand(op[0]);
- op[1] = fix_3src_operand(op[1]);
- op[2] = fix_3src_operand(op[2]);
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(MAD(result_dst, op[2], op[1], op[0]));
- break;
-
- case ir_triop_lrp:
- emit_lrp(result_dst, op[0], op[1], op[2]);
- break;
-
- case ir_triop_csel:
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]);
- inst->predicate = BRW_PREDICATE_NORMAL;
- break;
-
- case ir_triop_bfi:
- op[0] = fix_3src_operand(op[0]);
- op[1] = fix_3src_operand(op[1]);
- op[2] = fix_3src_operand(op[2]);
- emit(BFI2(result_dst, op[0], op[1], op[2]));
- break;
-
- case ir_triop_bitfield_extract:
- op[0] = fix_3src_operand(op[0]);
- op[1] = fix_3src_operand(op[1]);
- op[2] = fix_3src_operand(op[2]);
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(BFE(result_dst, op[2], op[1], op[0]));
- break;
-
- case ir_triop_vector_insert:
- unreachable("should have been lowered by lower_vector_insert");
-
- case ir_quadop_bitfield_insert:
- unreachable("not reached: should be handled by "
- "bitfield_insert_to_bfm_bfi\n");
-
- case ir_quadop_vector:
- unreachable("not reached: should be handled by lower_quadop_vector");
-
- case ir_unop_pack_half_2x16:
- emit_pack_half_2x16(result_dst, op[0]);
- break;
- case ir_unop_unpack_half_2x16:
- emit_unpack_half_2x16(result_dst, op[0]);
- break;
- case ir_unop_pack_snorm_2x16:
- case ir_unop_pack_snorm_4x8:
- case ir_unop_pack_unorm_2x16:
- case ir_unop_pack_unorm_4x8:
- case ir_unop_unpack_snorm_2x16:
- case ir_unop_unpack_snorm_4x8:
- case ir_unop_unpack_unorm_2x16:
- case ir_unop_unpack_unorm_4x8:
- unreachable("not reached: should be handled by lower_packing_builtins");
- case ir_unop_unpack_half_2x16_split_x:
- case ir_unop_unpack_half_2x16_split_y:
- case ir_binop_pack_half_2x16_split:
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_sample:
- case ir_binop_interpolate_at_offset:
- unreachable("not reached: should not occur in vertex shader");
- case ir_binop_ldexp:
- unreachable("not reached: should be handled by ldexp_to_arith()");
- }
-}
-
-
-void
-vec4_visitor::visit(ir_swizzle *ir)
-{
- src_reg src;
- int i = 0;
- int swizzle[4];
-
- /* Note that this is only swizzles in expressions, not those on the left
- * hand side of an assignment, which do write masking. See ir_assignment
- * for that.
- */
-
- ir->val->accept(this);
- src = this->result;
- assert(src.file != BAD_FILE);
-
- for (i = 0; i < ir->type->vector_elements; i++) {
- switch (i) {
- case 0:
- swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
- break;
- case 1:
- swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
- break;
- case 2:
- swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
- break;
- case 3:
- swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
- break;
- }
- }
- for (; i < 4; i++) {
- /* Replicate the last channel out. */
- swizzle[i] = swizzle[ir->type->vector_elements - 1];
- }
-
- src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
-
- this->result = src;
-}
-
-void
-vec4_visitor::visit(ir_dereference_variable *ir)
-{
- const struct glsl_type *type = ir->type;
- dst_reg *reg = variable_storage(ir->var);
-
- if (!reg) {
- fail("Failed to find variable storage for %s\n", ir->var->name);
- this->result = src_reg(brw_null_reg());
- return;
- }
-
- this->result = src_reg(*reg);
-
- /* System values get their swizzle from the dst_reg writemask */
- if (ir->var->data.mode == ir_var_system_value)
- return;
-
- if (type->is_scalar() || type->is_vector() || type->is_matrix())
- this->result.swizzle = swizzle_for_size(type->vector_elements);
-}
-
-
-int
-vec4_visitor::compute_array_stride(ir_dereference_array *ir)
-{
- /* Under normal circumstances array elements are stored consecutively, so
- * the stride is equal to the size of the array element.
- */
- return type_size(ir->type);
-}
-
-
-void
-vec4_visitor::visit(ir_dereference_array *ir)
-{
- ir_constant *constant_index;
- src_reg src;
- int array_stride = compute_array_stride(ir);
-
- constant_index = ir->array_index->constant_expression_value();
-
- ir->array->accept(this);
- src = this->result;
-
- if (constant_index) {
- src.reg_offset += constant_index->value.i[0] * array_stride;
- } else {
- /* Variable index array dereference. It eats the "vec4" of the
- * base of the array and an index that offsets the Mesa register
- * index.
- */
- ir->array_index->accept(this);
-
- src_reg index_reg;
-
- if (array_stride == 1) {
- index_reg = this->result;
- } else {
- index_reg = src_reg(this, glsl_type::int_type);
-
- emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
- }
-
- if (src.reladdr) {
- src_reg temp = src_reg(this, glsl_type::int_type);
-
- emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
-
- index_reg = temp;
- }
-
- src.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(src.reladdr, &index_reg, sizeof(index_reg));
- }
-
- /* If the type is smaller than a vec4, replicate the last channel out. */
- if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
- src.swizzle = swizzle_for_size(ir->type->vector_elements);
- else
- src.swizzle = BRW_SWIZZLE_NOOP;
- src.type = brw_type_for_base_type(ir->type);
-
- this->result = src;
-}
-
-void
-vec4_visitor::visit(ir_dereference_record *ir)
-{
- unsigned int i;
- const glsl_type *struct_type = ir->record->type;
- int offset = 0;
-
- ir->record->accept(this);
-
- for (i = 0; i < struct_type->length; i++) {
- if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
- break;
- offset += type_size(struct_type->fields.structure[i].type);
- }
-
- /* If the type is smaller than a vec4, replicate the last channel out. */
- if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
- this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
- else
- this->result.swizzle = BRW_SWIZZLE_NOOP;
- this->result.type = brw_type_for_base_type(ir->type);
-
- this->result.reg_offset += offset;
-}
-
-/**
- * We want to be careful in assignment setup to hit the actual storage
- * instead of potentially using a temporary like we might with the
- * ir_dereference handler.
- */
-static dst_reg
-get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
-{
- /* The LHS must be a dereference. If the LHS is a variable indexed array
- * access of a vector, it must be separated into a series conditional moves
- * before reaching this point (see ir_vec_index_to_cond_assign).
- */
- assert(ir->as_dereference());
- ir_dereference_array *deref_array = ir->as_dereference_array();
- if (deref_array) {
- assert(!deref_array->array->type->is_vector());
- }
-
- /* Use the rvalue deref handler for the most part. We'll ignore
- * swizzles in it and write swizzles using writemask, though.
- */
- ir->accept(v);
- return dst_reg(v->result);
-}
-
-void
-vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
- const struct glsl_type *type,
- enum brw_predicate predicate)
-{
- if (type->base_type == GLSL_TYPE_STRUCT) {
- for (unsigned int i = 0; i < type->length; i++) {
- emit_block_move(dst, src, type->fields.structure[i].type, predicate);
- }
- return;
- }
-
- if (type->is_array()) {
- for (unsigned int i = 0; i < type->length; i++) {
- emit_block_move(dst, src, type->fields.array, predicate);
- }
- return;
- }
-
- if (type->is_matrix()) {
- const struct glsl_type *vec_type;
-
- vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
- type->vector_elements, 1);
-
- for (int i = 0; i < type->matrix_columns; i++) {
- emit_block_move(dst, src, vec_type, predicate);
- }
- return;
- }
-
- assert(type->is_scalar() || type->is_vector());
-
- dst->type = brw_type_for_base_type(type);
- src->type = dst->type;
-
- dst->writemask = (1 << type->vector_elements) - 1;
-
- src->swizzle = swizzle_for_size(type->vector_elements);
-
- vec4_instruction *inst = emit(MOV(*dst, *src));
- inst->predicate = predicate;
-
- dst->reg_offset++;
- src->reg_offset++;
-}
-
-
-/* If the RHS processing resulted in an instruction generating a
- * temporary value, and it would be easy to rewrite the instruction to
- * generate its result right into the LHS instead, do so. This ends
- * up reliably removing instructions where it can be tricky to do so
- * later without real UD chain information.
- */
-bool
-vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
- dst_reg dst,
- src_reg src,
- vec4_instruction *pre_rhs_inst,
- vec4_instruction *last_rhs_inst)
-{
- /* This could be supported, but it would take more smarts. */
- if (ir->condition)
- return false;
-
- if (pre_rhs_inst == last_rhs_inst)
- return false; /* No instructions generated to work with. */
-
- /* Make sure the last instruction generated our source reg. */
- if (src.file != GRF ||
- src.file != last_rhs_inst->dst.file ||
- src.reg != last_rhs_inst->dst.reg ||
- src.reg_offset != last_rhs_inst->dst.reg_offset ||
- src.reladdr ||
- src.abs ||
- src.negate ||
- last_rhs_inst->predicate != BRW_PREDICATE_NONE)
- return false;
-
- /* Check that that last instruction fully initialized the channels
- * we want to use, in the order we want to use them. We could
- * potentially reswizzle the operands of many instructions so that
- * we could handle out of order channels, but don't yet.
- */
-
- for (unsigned i = 0; i < 4; i++) {
- if (dst.writemask & (1 << i)) {
- if (!(last_rhs_inst->dst.writemask & (1 << i)))
- return false;
-
- if (BRW_GET_SWZ(src.swizzle, i) != i)
- return false;
- }
- }
-
- /* Success! Rewrite the instruction. */
- last_rhs_inst->dst.file = dst.file;
- last_rhs_inst->dst.reg = dst.reg;
- last_rhs_inst->dst.reg_offset = dst.reg_offset;
- last_rhs_inst->dst.reladdr = dst.reladdr;
- last_rhs_inst->dst.writemask &= dst.writemask;
-
- return true;
-}
-
-void
-vec4_visitor::visit(ir_assignment *ir)
-{
- dst_reg dst = get_assignment_lhs(ir->lhs, this);
- enum brw_predicate predicate = BRW_PREDICATE_NONE;
-
- if (!ir->lhs->type->is_scalar() &&
- !ir->lhs->type->is_vector()) {
- ir->rhs->accept(this);
- src_reg src = this->result;
-
- if (ir->condition) {
- emit_bool_to_cond_code(ir->condition, &predicate);
- }
-
- /* emit_block_move doesn't account for swizzles in the source register.
- * This should be ok, since the source register is a structure or an
- * array, and those can't be swizzled. But double-check to be sure.
- */
- assert(src.swizzle ==
- (ir->rhs->type->is_matrix()
- ? swizzle_for_size(ir->rhs->type->vector_elements)
- : BRW_SWIZZLE_NOOP));
-
- emit_block_move(&dst, &src, ir->rhs->type, predicate);
- return;
- }
-
- /* Now we're down to just a scalar/vector with writemasks. */
- int i;
-
- vec4_instruction *pre_rhs_inst, *last_rhs_inst;
- pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
-
- ir->rhs->accept(this);
-
- last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
-
- src_reg src = this->result;
-
- int swizzles[4];
- int first_enabled_chan = 0;
- int src_chan = 0;
-
- assert(ir->lhs->type->is_vector() ||
- ir->lhs->type->is_scalar());
- dst.writemask = ir->write_mask;
-
- for (int i = 0; i < 4; i++) {
- if (dst.writemask & (1 << i)) {
- first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
- break;
- }
- }
-
- /* Swizzle a small RHS vector into the channels being written.
- *
- * glsl ir treats write_mask as dictating how many channels are
- * present on the RHS while in our instructions we need to make
- * those channels appear in the slots of the vec4 they're written to.
- */
- for (int i = 0; i < 4; i++) {
- if (dst.writemask & (1 << i))
- swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
- else
- swizzles[i] = first_enabled_chan;
- }
- src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
- swizzles[2], swizzles[3]);
-
- if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
- return;
- }
-
- if (ir->condition) {
- emit_bool_to_cond_code(ir->condition, &predicate);
- }
-
- for (i = 0; i < type_size(ir->lhs->type); i++) {
- vec4_instruction *inst = emit(MOV(dst, src));
- inst->predicate = predicate;
-
- dst.reg_offset++;
- src.reg_offset++;
- }
-}
-
-void
-vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
-{
- if (ir->type->base_type == GLSL_TYPE_STRUCT) {
- foreach_in_list(ir_constant, field_value, &ir->components) {
- emit_constant_values(dst, field_value);
- }
- return;
- }
-
- if (ir->type->is_array()) {
- for (unsigned int i = 0; i < ir->type->length; i++) {
- emit_constant_values(dst, ir->array_elements[i]);
- }
- return;
- }
-
- if (ir->type->is_matrix()) {
- for (int i = 0; i < ir->type->matrix_columns; i++) {
- float *vec = &ir->value.f[i * ir->type->vector_elements];
-
- for (int j = 0; j < ir->type->vector_elements; j++) {
- dst->writemask = 1 << j;
- dst->type = BRW_REGISTER_TYPE_F;
-
- emit(MOV(*dst, src_reg(vec[j])));
- }
- dst->reg_offset++;
- }
- return;
- }
-
- int remaining_writemask = (1 << ir->type->vector_elements) - 1;
-
- for (int i = 0; i < ir->type->vector_elements; i++) {
- if (!(remaining_writemask & (1 << i)))
- continue;
-
- dst->writemask = 1 << i;
- dst->type = brw_type_for_base_type(ir->type);
-
- /* Find other components that match the one we're about to
- * write. Emits fewer instructions for things like vec4(0.5,
- * 1.5, 1.5, 1.5).
- */
- for (int j = i + 1; j < ir->type->vector_elements; j++) {
- if (ir->type->base_type == GLSL_TYPE_BOOL) {
- if (ir->value.b[i] == ir->value.b[j])
- dst->writemask |= (1 << j);
- } else {
- /* u, i, and f storage all line up, so no need for a
- * switch case for comparing each type.
- */
- if (ir->value.u[i] == ir->value.u[j])
- dst->writemask |= (1 << j);
- }
- }
-
- switch (ir->type->base_type) {
- case GLSL_TYPE_FLOAT:
- emit(MOV(*dst, src_reg(ir->value.f[i])));
- break;
- case GLSL_TYPE_INT:
- emit(MOV(*dst, src_reg(ir->value.i[i])));
- break;
- case GLSL_TYPE_UINT:
- emit(MOV(*dst, src_reg(ir->value.u[i])));
- break;
- case GLSL_TYPE_BOOL:
- emit(MOV(*dst,
- src_reg(ir->value.b[i] != 0 ? ctx->Const.UniformBooleanTrue
- : 0)));
- break;
- default:
- unreachable("Non-float/uint/int/bool constant");
- }
-
- remaining_writemask &= ~dst->writemask;
- }
- dst->reg_offset++;
-}
-
-void
-vec4_visitor::visit(ir_constant *ir)
-{
- dst_reg dst = dst_reg(this, ir->type);
- this->result = src_reg(dst);
-
- emit_constant_values(&dst, ir);
-}
-
-void
-vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
-{
- ir_dereference *deref = static_cast<ir_dereference *>(
- ir->actual_parameters.get_head());
- ir_variable *location = deref->variable_referenced();
- unsigned surf_index = (prog_data->base.binding_table.abo_start +
- location->data.binding);
-
- /* Calculate the surface offset */
- src_reg offset(this, glsl_type::uint_type);
- ir_dereference_array *deref_array = deref->as_dereference_array();
- if (deref_array) {
- deref_array->array_index->accept(this);
-
- src_reg tmp(this, glsl_type::uint_type);
- emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
- emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset));
- } else {
- offset = location->data.atomic.offset;
- }
-
- /* Emit the appropriate machine instruction */
- const char *callee = ir->callee->function_name();
- dst_reg dst = get_assignment_lhs(ir->return_deref, this);
-
- if (!strcmp("__intrinsic_atomic_read", callee)) {
- emit_untyped_surface_read(surf_index, dst, offset);
-
- } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
- emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
- src_reg(), src_reg());
-
- } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
- emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
- src_reg(), src_reg());
- }
-}
-
-void
-vec4_visitor::visit(ir_call *ir)
-{
- const char *callee = ir->callee->function_name();
-
- if (!strcmp("__intrinsic_atomic_read", callee) ||
- !strcmp("__intrinsic_atomic_increment", callee) ||
- !strcmp("__intrinsic_atomic_predecrement", callee)) {
- visit_atomic_counter_intrinsic(ir);
- } else {
- unreachable("Unsupported intrinsic.");
- }
-}
-
-src_reg
-vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler)
-{
- vec4_instruction *inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS);
- inst->base_mrf = 2;
- inst->mlen = 1;
- inst->dst = dst_reg(this, glsl_type::uvec4_type);
- inst->dst.writemask = WRITEMASK_XYZW;
-
- inst->src[1] = sampler;
-
- /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
- int param_base = inst->base_mrf;
- int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
- int zero_mask = 0xf & ~coord_mask;
-
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
- coordinate));
-
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
- src_reg(0)));
-
- emit(inst);
- return src_reg(inst->dst);
-}
-
-static bool
-is_high_sampler(struct brw_context *brw, src_reg sampler)
-{
- if (brw->gen < 8 && !brw->is_haswell)
- return false;
-
- return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
-}
-
-void
-vec4_visitor::visit(ir_texture *ir)
-{
- uint32_t sampler =
- _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
-
- ir_rvalue *nonconst_sampler_index =
- _mesa_get_sampler_array_nonconst_index(ir->sampler);
-
- /* Handle non-constant sampler array indexing */
- src_reg sampler_reg;
- if (nonconst_sampler_index) {
- /* The highest sampler which may be used by this operation is
- * the last element of the array. Mark it here, because the generator
- * doesn't have enough information to determine the bound.
- */
- uint32_t array_size = ir->sampler->as_dereference_array()
- ->array->type->array_size();
-
- uint32_t max_used = sampler + array_size - 1;
- if (ir->op == ir_tg4 && brw->gen < 8) {
- max_used += prog_data->base.binding_table.gather_texture_start;
- } else {
- max_used += prog_data->base.binding_table.texture_start;
- }
-
- brw_mark_surface_used(&prog_data->base, max_used);
-
- /* Emit code to evaluate the actual indexing expression */
- nonconst_sampler_index->accept(this);
- dst_reg temp(this, glsl_type::uint_type);
- emit(ADD(temp, this->result, src_reg(sampler)))
- ->force_writemask_all = true;
- sampler_reg = src_reg(temp);
- } else {
- /* Single sampler, or constant array index; the indexing expression
- * is just an immediate.
- */
- sampler_reg = src_reg(sampler);
- }
-
- /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
- * emitting anything other than setting up the constant result.
- */
- if (ir->op == ir_tg4) {
- ir_constant *chan = ir->lod_info.component->as_constant();
- int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
- if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
- dst_reg result(this, ir->type);
- this->result = src_reg(result);
- emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
- return;
- }
- }
-
- /* Should be lowered by do_lower_texture_projection */
- assert(!ir->projector);
-
- /* Should be lowered */
- assert(!ir->offset || !ir->offset->type->is_array());
-
- /* Generate code to compute all the subexpression trees. This has to be
- * done before loading any values into MRFs for the sampler message since
- * generating these values may involve SEND messages that need the MRFs.
- */
- src_reg coordinate;
- if (ir->coordinate) {
- ir->coordinate->accept(this);
- coordinate = this->result;
- }
-
- src_reg shadow_comparitor;
- if (ir->shadow_comparitor) {
- ir->shadow_comparitor->accept(this);
- shadow_comparitor = this->result;
- }
-
- bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
- src_reg offset_value;
- if (has_nonconstant_offset) {
- ir->offset->accept(this);
- offset_value = src_reg(this->result);
- }
-
- const glsl_type *lod_type = NULL, *sample_index_type = NULL;
- src_reg lod, dPdx, dPdy, sample_index, mcs;
- switch (ir->op) {
- case ir_tex:
- lod = src_reg(0.0f);
- lod_type = glsl_type::float_type;
- break;
- case ir_txf:
- case ir_txl:
- case ir_txs:
- ir->lod_info.lod->accept(this);
- lod = this->result;
- lod_type = ir->lod_info.lod->type;
- break;
- case ir_query_levels:
- lod = src_reg(0);
- lod_type = glsl_type::int_type;
- break;
- case ir_txf_ms:
- ir->lod_info.sample_index->accept(this);
- sample_index = this->result;
- sample_index_type = ir->lod_info.sample_index->type;
-
- if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
- mcs = emit_mcs_fetch(ir, coordinate, sampler_reg);
- else
- mcs = src_reg(0u);
- break;
- case ir_txd:
- ir->lod_info.grad.dPdx->accept(this);
- dPdx = this->result;
-
- ir->lod_info.grad.dPdy->accept(this);
- dPdy = this->result;
-
- lod_type = ir->lod_info.grad.dPdx->type;
- break;
- case ir_txb:
- case ir_lod:
- case ir_tg4:
- break;
- }
-
- enum opcode opcode;
- switch (ir->op) {
- case ir_tex: opcode = SHADER_OPCODE_TXL; break;
- case ir_txl: opcode = SHADER_OPCODE_TXL; break;
- case ir_txd: opcode = SHADER_OPCODE_TXD; break;
- case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
- case ir_txs: opcode = SHADER_OPCODE_TXS; break;
- case ir_tg4: opcode = has_nonconstant_offset
- ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
- case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
- case ir_txb:
- unreachable("TXB is not valid for vertex shaders.");
- case ir_lod:
- unreachable("LOD is not valid for vertex shaders.");
- default:
- unreachable("Unrecognized tex op");
- }
-
- vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode);
-
- if (ir->offset != NULL && ir->op != ir_txf)
- inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant());
-
- /* Stuff the channel select bits in the top of the texture offset */
- if (ir->op == ir_tg4)
- inst->texture_offset |= gather_channel(ir, sampler) << 16;
-
- /* The message header is necessary for:
- * - Gen4 (always)
- * - Texel offsets
- * - Gather channel selection
- * - Sampler indices too large to fit in a 4-bit value.
- */
- inst->header_present =
- brw->gen < 5 || inst->texture_offset != 0 || ir->op == ir_tg4 ||
- is_high_sampler(brw, sampler_reg);
- inst->base_mrf = 2;
- inst->mlen = inst->header_present + 1; /* always at least one */
- inst->dst = dst_reg(this, ir->type);
- inst->dst.writemask = WRITEMASK_XYZW;
- inst->shadow_compare = ir->shadow_comparitor != NULL;
-
- inst->src[1] = sampler_reg;
-
- /* MRF for the first parameter */
- int param_base = inst->base_mrf + inst->header_present;
- if (ir->op == ir_txs || ir->op == ir_query_levels) {
- int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
- emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
} else {
- /* Load the coordinate */
- /* FINISHME: gl_clamp_mask and saturate */
- int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
- int zero_mask = 0xf & ~coord_mask;
-
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
- coordinate));
-
- if (zero_mask != 0) {
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
- src_reg(0)));
- }
- /* Load the shadow comparitor */
- if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) {
- emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
- WRITEMASK_X),
- shadow_comparitor));
- inst->mlen++;
- }
-
- /* Load the LOD info */
- if (ir->op == ir_tex || ir->op == ir_txl) {
- int mrf, writemask;
- if (brw->gen >= 5) {
- mrf = param_base + 1;
- if (ir->shadow_comparitor) {
- writemask = WRITEMASK_Y;
- /* mlen already incremented */
- } else {
- writemask = WRITEMASK_X;
- inst->mlen++;
- }
- } else /* brw->gen == 4 */ {
- mrf = param_base;
- writemask = WRITEMASK_W;
- }
- emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
- } else if (ir->op == ir_txf) {
- emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
- } else if (ir->op == ir_txf_ms) {
- emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
- sample_index));
- if (brw->gen >= 7) {
- /* MCS data is in the first channel of `mcs`, but we need to get it into
- * the .y channel of the second vec4 of params, so replicate .x across
- * the whole vec4 and then mask off everything except .y
- */
- mcs.swizzle = BRW_SWIZZLE_XXXX;
- emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
- mcs));
- }
- inst->mlen++;
- } else if (ir->op == ir_txd) {
- const glsl_type *type = lod_type;
-
- if (brw->gen >= 5) {
- dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
- dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
- emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
- emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
- inst->mlen++;
-
- if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
- dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
- dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
- emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
- emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
- inst->mlen++;
-
- if (ir->shadow_comparitor) {
- emit(MOV(dst_reg(MRF, param_base + 2,
- ir->shadow_comparitor->type, WRITEMASK_Z),
- shadow_comparitor));
- }
- }
- } else /* brw->gen == 4 */ {
- emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
- emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
- inst->mlen += 2;
- }
- } else if (ir->op == ir_tg4 && has_nonconstant_offset) {
- if (ir->shadow_comparitor) {
- emit(MOV(dst_reg(MRF, param_base, ir->shadow_comparitor->type, WRITEMASK_W),
- shadow_comparitor));
- }
-
- emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
- offset_value));
- inst->mlen++;
- }
- }
-
- emit(inst);
-
- /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
- * spec requires layers.
- */
- if (ir->op == ir_txs) {
- glsl_type const *type = ir->sampler->type;
- if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
- type->sampler_array) {
- emit_math(SHADER_OPCODE_INT_QUOTIENT,
- writemask(inst->dst, WRITEMASK_Z),
- src_reg(inst->dst), src_reg(6));
- }
- }
-
- if (brw->gen == 6 && ir->op == ir_tg4) {
- emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
- }
-
- swizzle_result(ir, src_reg(inst->dst), sampler);
-}
-
-/**
- * Apply workarounds for Gen6 gather with UINT/SINT
- */
-void
-vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
-{
- if (!wa)
+ backend_visitor::visit(ir);
return;
-
- int width = (wa & WA_8BIT) ? 8 : 16;
- dst_reg dst_f = dst;
- dst_f.type = BRW_REGISTER_TYPE_F;
-
- /* Convert from UNORM to UINT */
- emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1))));
- emit(MOV(dst, src_reg(dst_f)));
-
- if (wa & WA_SIGN) {
- /* Reinterpret the UINT value as a signed INT value by
- * shifting the sign bit into place, then shifting back
- * preserving sign.
- */
- emit(SHL(dst, src_reg(dst), src_reg(32 - width)));
- emit(ASR(dst, src_reg(dst), src_reg(32 - width)));
}
-}
-/**
- * Set up the gather channel based on the swizzle, for gather4.
- */
-uint32_t
-vec4_visitor::gather_channel(ir_texture *ir, uint32_t sampler)
-{
- ir_constant *chan = ir->lod_info.component->as_constant();
- int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
- switch (swiz) {
- case SWIZZLE_X: return 0;
- case SWIZZLE_Y:
- /* gather4 sampler is broken for green channel on RG32F --
- * we must ask for blue instead.
- */
- if (key->tex.gather_channel_quirk_mask & (1<<sampler))
- return 2;
- return 1;
- case SWIZZLE_Z: return 2;
- case SWIZZLE_W: return 3;
- default:
- unreachable("Not reached"); /* zero, one swizzles handled already */
- }
-}
-
-void
-vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler)
-{
- int s = key->tex.swizzles[sampler];
-
- this->result = src_reg(this, ir->type);
- dst_reg swizzled_result(this->result);
-
- if (ir->op == ir_query_levels) {
- /* # levels is in .w */
- orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
- emit(MOV(swizzled_result, orig_val));
- return;
- }
-
- if (ir->op == ir_txs || ir->type == glsl_type::float_type
- || s == SWIZZLE_NOOP || ir->op == ir_tg4) {
- emit(MOV(swizzled_result, orig_val));
- return;
- }
-
-
- int zero_mask = 0, one_mask = 0, copy_mask = 0;
- int swizzle[4] = {0};
-
- for (int i = 0; i < 4; i++) {
- switch (GET_SWZ(s, i)) {
- case SWIZZLE_ZERO:
- zero_mask |= (1 << i);
- break;
- case SWIZZLE_ONE:
- one_mask |= (1 << i);
- break;
- default:
- copy_mask |= (1 << i);
- swizzle[i] = GET_SWZ(s, i);
- break;
- }
- }
-
- if (copy_mask) {
- orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
- swizzled_result.writemask = copy_mask;
- emit(MOV(swizzled_result, orig_val));
- }
-
- if (zero_mask) {
- swizzled_result.writemask = zero_mask;
- emit(MOV(swizzled_result, src_reg(0.0f)));
- }
-
- if (one_mask) {
- swizzled_result.writemask = one_mask;
- emit(MOV(swizzled_result, src_reg(1.0f)));
- }
-}
-
-void
-vec4_visitor::visit(ir_return *)
-{
- unreachable("not reached");
+ reg->type = brw_type_for_base_type(ir->type);
+ hash_table_insert(this->variable_ht, reg, ir);
}
void
@@ -2801,35 +350,6 @@ vec4_visitor::visit(ir_discard *)
}
void
-vec4_visitor::visit(ir_if *ir)
-{
- /* Don't point the annotation at the if statement, because then it plus
- * the then and else blocks get printed.
- */
- this->base_ir = ir->condition;
-
- if (brw->gen == 6) {
- emit_if_gen6(ir);
- } else {
- enum brw_predicate predicate;
- emit_bool_to_cond_code(ir->condition, &predicate);
- emit(IF(predicate));
- }
-
- visit_instructions(&ir->then_instructions);
-
- if (!ir->else_instructions.is_empty()) {
- this->base_ir = ir->condition;
- emit(BRW_OPCODE_ELSE);
-
- visit_instructions(&ir->else_instructions);
- }
-
- this->base_ir = ir->condition;
- emit(BRW_OPCODE_ENDIF);
-}
-
-void
vec4_visitor::visit(ir_emit_vertex *)
{
unreachable("not reached");
@@ -2842,55 +362,6 @@ vec4_visitor::visit(ir_end_primitive *)
}
void
-vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- dst_reg dst, src_reg offset,
- src_reg src0, src_reg src1)
-{
- unsigned mlen = 0;
-
- /* Set the atomic operation offset. */
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
- mlen++;
-
- /* Set the atomic operation arguments. */
- if (src0.file != BAD_FILE) {
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
- mlen++;
- }
-
- if (src1.file != BAD_FILE) {
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
- mlen++;
- }
-
- /* Emit the instruction. Note that this maps to the normal SIMD8
- * untyped atomic message on Ivy Bridge, but that's OK because
- * unused channels will be masked out.
- */
- vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
- src_reg(atomic_op), src_reg(surf_index));
- inst->base_mrf = 0;
- inst->mlen = mlen;
-}
-
-void
-vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
- src_reg offset)
-{
- /* Set the surface read offset. */
- emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
-
- /* Emit the instruction. Note that this maps to the normal SIMD8
- * untyped surface read message, but that's OK because unused
- * channels will be masked out.
- */
- vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ,
- dst, src_reg(surf_index));
- inst->base_mrf = 0;
- inst->mlen = 1;
-}
-
-void
vec4_visitor::emit_ndc_computation()
{
/* Get the position */
@@ -2900,17 +371,17 @@ vec4_visitor::emit_ndc_computation()
dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
output_reg[BRW_VARYING_SLOT_NDC] = ndc;
- current_annotation = "NDC";
+ bld.set_annotation("NDC");
dst_reg ndc_w = ndc;
ndc_w.writemask = WRITEMASK_W;
src_reg pos_w = pos;
pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
- emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
+ bld.emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
dst_reg ndc_xyz = ndc;
ndc_xyz.writemask = WRITEMASK_XYZ;
- emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
+ bld.MUL(ndc_xyz, pos, src_reg(ndc_w));
}
void
@@ -2923,29 +394,29 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
dst_reg header1_w = header1;
header1_w.writemask = WRITEMASK_W;
- emit(MOV(header1, 0u));
+ bld.MOV(header1, 0u);
if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
- current_annotation = "Point size";
- emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
- emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
+ bld.set_annotation("Point size");
+ bld.MUL(header1_w, psiz, src_reg((float)(1 << 11)));
+ bld.AND(header1_w, src_reg(header1_w), 0x7ff << 8);
}
if (key->userclip_active) {
- current_annotation = "Clipping flags";
+ bld.set_annotation("Clipping flags");
dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
- emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
- emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
- emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
+ bld.CMP(bld.reg_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L);
+ bld.emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
+ bld.OR(header1_w, src_reg(header1_w), src_reg(flags0));
- emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
- emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
- emit(SHL(flags1, src_reg(flags1), src_reg(4)));
- emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
+ bld.CMP(bld.reg_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L);
+ bld.emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
+ bld.SHL(flags1, src_reg(flags1), src_reg(4));
+ bld.OR(header1_w, src_reg(header1_w), src_reg(flags1));
}
/* i965 clipping workaround:
@@ -2960,35 +431,35 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
if (brw->has_negative_rhw_bug) {
src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
ndc_w.swizzle = BRW_SWIZZLE_WWWW;
- emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
+ bld.CMP(bld.reg_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L);
vec4_instruction *inst;
- inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
+ inst = bld.OR(header1_w, src_reg(header1_w), src_reg(1u << 6));
inst->predicate = BRW_PREDICATE_NORMAL;
- inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
+ inst = bld.MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f));
inst->predicate = BRW_PREDICATE_NORMAL;
}
- emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
+ bld.MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1));
} else if (brw->gen < 6) {
- emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
+ bld.MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u);
} else {
- emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
+ bld.MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
dst_reg reg_w = reg;
reg_w.writemask = WRITEMASK_W;
- emit(MOV(reg_w, src_reg(output_reg[VARYING_SLOT_PSIZ])));
+ bld.MOV(reg_w, src_reg(output_reg[VARYING_SLOT_PSIZ]));
}
if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
dst_reg reg_y = reg;
reg_y.writemask = WRITEMASK_Y;
reg_y.type = BRW_REGISTER_TYPE_D;
- emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
+ bld.MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER]));
}
if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
dst_reg reg_z = reg;
reg_z.writemask = WRITEMASK_Z;
reg_z.type = BRW_REGISTER_TYPE_D;
- emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
+ bld.MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT]));
}
}
}
@@ -3016,9 +487,9 @@ vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
++i) {
reg.writemask = 1 << i;
- emit(DP4(reg,
+ bld.DP4(reg,
src_reg(output_reg[clip_vertex]),
- src_reg(this->userplane[i + offset])));
+ src_reg(this->userplane[i + offset]));
}
}
@@ -3027,10 +498,10 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
{
assert (varying < VARYING_SLOT_MAX);
reg.type = output_reg[varying].type;
- current_annotation = output_reg_annotation[varying];
+ bld.set_annotation(output_reg_annotation[varying]);
/* Copy the register, saturating if necessary */
- vec4_instruction *inst = emit(MOV(reg,
- src_reg(output_reg[varying])));
+ vec4_instruction *inst = bld.MOV(reg,
+ src_reg(output_reg[varying]));
if ((varying == VARYING_SLOT_COL0 ||
varying == VARYING_SLOT_COL1 ||
varying == VARYING_SLOT_BFC0 ||
@@ -3049,17 +520,17 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
case VARYING_SLOT_PSIZ:
{
/* PSIZ is always in slot 0, and is coupled with other flags. */
- current_annotation = "indices, point width, clip flags";
+ bld.set_annotation("indices, point width, clip flags");
emit_psiz_and_flags(reg);
break;
}
case BRW_VARYING_SLOT_NDC:
- current_annotation = "NDC";
- emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
+ bld.set_annotation("NDC");
+ bld.MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC]));
break;
case VARYING_SLOT_POS:
- current_annotation = "gl_Position";
- emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
+ bld.set_annotation("gl_Position");
+ bld.MOV(reg, src_reg(output_reg[VARYING_SLOT_POS]));
break;
case VARYING_SLOT_EDGE:
/* This is present when doing unfilled polygons. We're supposed to copy
@@ -3068,9 +539,9 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
* of that attribute (starts as 1.0f). This is then used in clipping to
* determine which edges should be drawn as wireframe.
*/
- current_annotation = "edge flag";
- emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
- glsl_type::float_type, WRITEMASK_XYZW))));
+ bld.set_annotation("edge flag");
+ bld.MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
+ glsl_type::float_type, WRITEMASK_XYZW)));
break;
case BRW_VARYING_SLOT_PAD:
/* No need to write to this slot */
@@ -3138,7 +609,7 @@ vec4_visitor::emit_vertex()
/* Lower legacy ff and ClipVertex clipping to clip distances */
if (key->userclip_active && !prog->UsesClipDistanceOut) {
- current_annotation = "user clip distances";
+ bld.set_annotation("user clip distances");
output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
@@ -3173,7 +644,7 @@ vec4_visitor::emit_vertex()
}
complete = slot >= prog_data->vue_map.num_slots;
- current_annotation = "URB write";
+ bld.set_annotation("URB write");
vec4_instruction *inst = emit_urb_write_opcode(complete);
inst->base_mrf = base_mrf;
inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
@@ -3198,44 +669,14 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
message_header_scale *= 16;
if (reladdr) {
+ vec4_builder ibld = bld.at(block, inst);
src_reg index = src_reg(this, glsl_type::int_type);
- emit_before(block, inst, ADD(dst_reg(index), *reladdr,
- src_reg(reg_offset)));
- emit_before(block, inst, MUL(dst_reg(index), index,
- src_reg(message_header_scale)));
-
- return index;
- } else {
- return src_reg(reg_offset * message_header_scale);
- }
-}
-
-src_reg
-vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
- src_reg *reladdr, int reg_offset)
-{
- if (reladdr) {
- src_reg index = src_reg(this, glsl_type::int_type);
-
- emit_before(block, inst, ADD(dst_reg(index), *reladdr,
- src_reg(reg_offset)));
-
- /* Pre-gen6, the message header uses byte offsets instead of vec4
- * (16-byte) offset units.
- */
- if (brw->gen < 6) {
- emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16)));
- }
+ ibld.ADD(dst_reg(index), *reladdr, src_reg(reg_offset));
+ ibld.MUL(dst_reg(index), index, src_reg(message_header_scale));
return index;
- } else if (brw->gen >= 8) {
- /* Store the offset in a GRF so we can send-from-GRF. */
- src_reg offset = src_reg(this, glsl_type::int_type);
- emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset)));
- return offset;
} else {
- int message_header_scale = brw->gen < 6 ? 16 : 1;
return src_reg(reg_offset * message_header_scale);
}
}
@@ -3251,11 +692,12 @@ vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
dst_reg temp, src_reg orig_src,
int base_offset)
{
+ vec4_builder ibld = bld.at(block, inst);
int reg_offset = base_offset + orig_src.reg_offset;
src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
reg_offset);
- emit_before(block, inst, SCRATCH_READ(temp, index));
+ SCRATCH_READ(ibld, temp, index);
}
/**
@@ -3291,13 +733,13 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
swizzles[2], swizzles[3]);
+ vec4_builder ibld = bld.at(block, (vec4_instruction *)inst->next);
dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
inst->dst.writemask));
- vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
- write->predicate = inst->predicate;
- write->ir = inst->ir;
- write->annotation = inst->annotation;
- inst->insert_after(block, write);
+ ibld.set_base_ir(inst->ir);
+ ibld.set_annotation(inst->annotation);
+ exec_predicate(inst->predicate,
+ SCRATCH_WRITE(ibld, dst, temp, index));
inst->dst.file = temp.file;
inst->dst.reg = temp.reg;
@@ -3346,8 +788,8 @@ vec4_visitor::move_grf_array_access_to_scratch()
*/
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
/* Set up the annotation tracking for new generated instructions. */
- base_ir = inst->ir;
- current_annotation = inst->annotation;
+ bld.set_base_ir(inst->ir);
+ bld.set_annotation(inst->annotation);
if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
@@ -3371,37 +813,6 @@ vec4_visitor::move_grf_array_access_to_scratch()
}
/**
- * Emits an instruction before @inst to load the value named by @orig_src
- * from the pull constant buffer (surface) at @base_offset to @temp.
- */
-void
-vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
- dst_reg temp, src_reg orig_src,
- int base_offset)
-{
- int reg_offset = base_offset + orig_src.reg_offset;
- src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
- src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
- reg_offset);
- vec4_instruction *load;
-
- if (brw->gen >= 7) {
- dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
- grf_offset.type = offset.type;
- emit_before(block, inst, MOV(grf_offset, offset));
-
- load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
- temp, index, src_reg(grf_offset));
- } else {
- load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
- temp, index, offset);
- load->base_mrf = 14;
- load->mlen = 1;
- }
- emit_before(block, inst, load);
-}
-
-/**
* Implements array access of uniforms by inserting a
* PULL_CONSTANT_LOAD instruction.
*
@@ -3430,7 +841,7 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
continue;
- int uniform = inst->src[i].reg;
+ unsigned uniform = inst->src[i].reg;
/* If this array isn't already present in the pull constant buffer,
* add it.
@@ -3449,13 +860,16 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
}
/* Set up the annotation tracking for new generated instructions. */
- base_ir = inst->ir;
- current_annotation = inst->annotation;
+ bld.set_base_ir(inst->ir);
+ bld.set_annotation(inst->annotation);
- dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+ vec4_builder ibld = bld.at(block, inst);
+ int loc = pull_constant_loc[uniform] + inst->src[i].reg_offset;
+ src_reg surf_index(prog_data->base.binding_table.pull_constants_start);
+ dst_reg temp = bld.natural_reg(BRW_REGISTER_TYPE_F);
- emit_pull_constant_load(block, inst, temp, inst->src[i],
- pull_constant_loc[uniform]);
+ emit_pull_constant_load(ibld, temp, surf_index, 16 * loc,
+ inst->src[i].reladdr, 4);
inst->src[i].file = temp.file;
inst->src[i].reg = temp.reg;
@@ -3472,16 +886,162 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
split_uniform_registers();
}
-void
-vec4_visitor::resolve_ud_negate(src_reg *reg)
+static bool
+is_high_sampler(struct brw_context *brw, src_reg sampler)
{
- if (reg->type != BRW_REGISTER_TYPE_UD ||
- !reg->negate)
- return;
+ if (brw->gen < 8 && !brw->is_haswell)
+ return false;
- src_reg temp = src_reg(this, glsl_type::uvec4_type);
- emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
- *reg = temp;
+ return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+}
+
+vec4_instruction *
+vec4_visitor::emit_texture(ir_texture *ir, const dst_reg &dst,
+ const src_reg &coordinate, const src_reg &shadow_c,
+ const src_reg &lod, const src_reg &lod2,
+ const src_reg &offset_val, const src_reg &sample_index,
+ const src_reg &mcs, const src_reg &sampler)
+{
+ const bool has_nonconstant_offset = (offset_val.file != BAD_FILE);
+ enum opcode opcode;
+
+ switch (ir->op) {
+ case ir_tex: opcode = SHADER_OPCODE_TXL; break;
+ case ir_txl: opcode = SHADER_OPCODE_TXL; break;
+ case ir_txd: opcode = SHADER_OPCODE_TXD; break;
+ case ir_txf: opcode = SHADER_OPCODE_TXF; break;
+ case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
+ case ir_txs: opcode = SHADER_OPCODE_TXS; break;
+ case ir_tg4: opcode = has_nonconstant_offset
+ ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
+ case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+ case ir_txb:
+ unreachable("TXB is not valid for vertex shaders.");
+ case ir_lod:
+ unreachable("LOD is not valid for vertex shaders.");
+ default:
+ unreachable("Unrecognized tex op");
+ }
+
+ vec4_instruction inst(opcode, dst, src_reg(), sampler);
+
+ /* The message header is necessary for:
+ * - Gen4 (always)
+ * - Texel offsets
+ * - Gather channel selection
+ * - Sampler indices too large to fit in a 4-bit value.
+ */
+ inst.header_present =
+ brw->gen < 5 || inst.texture_offset != 0 || ir->op == ir_tg4 ||
+ is_high_sampler(brw, sampler);
+ inst.base_mrf = 2;
+ inst.mlen = inst.header_present + 1; /* always at least one */
+
+ /* MRF for the first parameter */
+ dst_reg payload = dst_reg(MRF, inst.base_mrf + inst.header_present);
+
+ if (ir->op == ir_txs || ir->op == ir_query_levels) {
+ const unsigned mask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
+ bld.MOV(writemask(retype(payload, lod.type), mask), lod);
+ } else {
+ /* Load the coordinate */
+ /* FINISHME: gl_clamp_mask and saturate */
+ int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
+ int zero_mask = 0xf & ~coord_mask;
+
+ bld.MOV(writemask(retype(payload, coordinate.type), coord_mask),
+ coordinate);
+
+ if (zero_mask != 0)
+ bld.MOV(writemask(retype(payload, coordinate.type), zero_mask),
+ src_reg(0));
+
+ /* Load the shadow comparitor */
+ if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) {
+ bld.MOV(writemask(offset(retype(payload, shadow_c.type), 1),
+ WRITEMASK_X),
+ shadow_c);
+ inst.mlen++;
+ }
+
+ /* Load the LOD info */
+ if (ir->op == ir_tex || ir->op == ir_txl) {
+ dst_reg mrf;
+ unsigned mask;
+ if (brw->gen >= 5) {
+ mrf = offset(payload, 1);
+ if (ir->shadow_comparitor) {
+ mask = WRITEMASK_Y;
+ /* mlen already incremented */
+ } else {
+ mask = WRITEMASK_X;
+ inst.mlen++;
+ }
+ } else /* brw->gen == 4 */ {
+ mrf = payload;
+ mask = WRITEMASK_W;
+ }
+ bld.MOV(writemask(retype(mrf, lod.type), mask), lod);
+ } else if (ir->op == ir_txf) {
+ bld.MOV(writemask(retype(payload, lod.type), WRITEMASK_W),
+ lod);
+ } else if (ir->op == ir_txf_ms) {
+ bld.MOV(writemask(retype(offset(payload, 1), sample_index.type),
+ WRITEMASK_X), sample_index);
+ if (brw->gen >= 7) {
+ /* MCS data is in the first channel of `mcs`, but we need to get it into
+ * the .y channel of the second vec4 of params, so replicate .x across
+ * the whole vec4 and then mask off everything except .y
+ */
+ bld.MOV(writemask(retype(offset(payload, 1), BRW_REGISTER_TYPE_UD),
+ WRITEMASK_Y),
+ swizzle(mcs, BRW_SWIZZLE_XXXX));
+ }
+ inst.mlen++;
+ } else if (ir->op == ir_txd) {
+ dst_reg mrf = retype(payload, lod.type);
+
+ if (brw->gen >= 5) {
+ bld.MOV(writemask(offset(mrf, 1), WRITEMASK_XZ),
+ swizzle(lod, BRW_SWIZZLE_XXYY));
+ bld.MOV(writemask(offset(mrf, 1), WRITEMASK_YW),
+ swizzle(lod2, BRW_SWIZZLE_XXYY));
+ inst.mlen++;
+
+ if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
+ bld.MOV(writemask(offset(mrf, 2), WRITEMASK_X),
+ swizzle(lod, BRW_SWIZZLE_ZZZZ));
+ bld.MOV(writemask(offset(mrf, 2), WRITEMASK_Y),
+ swizzle(lod2, BRW_SWIZZLE_ZZZZ));
+ inst.mlen++;
+
+ if (ir->shadow_comparitor)
+ bld.MOV(writemask(offset(retype(payload, shadow_c.type), 2),
+ WRITEMASK_Z), shadow_c);
+ }
+ } else /* brw->gen == 4 */ {
+ bld.MOV(writemask(offset(mrf, 1), WRITEMASK_XYZ), lod);
+ bld.MOV(writemask(offset(mrf, 2), WRITEMASK_XYZ), lod2);
+ inst.mlen += 2;
+ }
+ } else if (ir->op == ir_tg4 && has_nonconstant_offset) {
+ if (ir->shadow_comparitor)
+ bld.MOV(writemask(retype(payload, shadow_c.type),
+ WRITEMASK_W), shadow_c);
+
+ bld.MOV(writemask(retype(offset(payload, 1), BRW_REGISTER_TYPE_D),
+ WRITEMASK_XY), offset_val);
+ inst.mlen++;
+ }
+ }
+
+ return bld.emit(inst);
+}
+
+src_reg
+vec4_visitor::emit_untyped_surface_header()
+{
+ return src_reg();
}
vec4_visitor::vec4_visitor(struct brw_context *brw,
@@ -3494,81 +1054,26 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
void *mem_ctx,
bool debug_flag,
bool no_spills,
- shader_time_shader_type st_base,
- shader_time_shader_type st_written,
- shader_time_shader_type st_reset)
- : backend_visitor(brw, shader_prog, prog, &prog_data->base, stage),
+ shader_time_shader_type st_type)
+ : backend_visitor(brw, shader_prog, prog, &prog_data->base, mem_ctx, stage,
+ debug_flag, false,
+ vec4_builder(brw, mem_ctx, alloc, instructions),
+ st_type,
+ /* Initialize uniform_array_size to at least 1 because
+ * pre-gen6 VS requires at least one. See
+ * setup_uniforms() in brw_vec4.cpp.
+ */
+ MAX2(prog_data->base.nr_params, 1)),
c(c),
key(key),
prog_data(prog_data),
sanity_param_count(0),
- fail_msg(NULL),
- first_non_payload_grf(0),
need_all_constants_in_pull_buffer(false),
debug_flag(debug_flag),
- no_spills(no_spills),
- st_base(st_base),
- st_written(st_written),
- st_reset(st_reset)
+ no_spills(no_spills)
{
- this->mem_ctx = mem_ctx;
- this->failed = false;
-
- this->base_ir = NULL;
- this->current_annotation = NULL;
memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
-
- this->variable_ht = hash_table_ctor(0,
- hash_table_pointer_hash,
- hash_table_pointer_compare);
-
- this->virtual_grf_start = NULL;
- this->virtual_grf_end = NULL;
this->live_intervals_valid = false;
-
- this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
-
- this->uniforms = 0;
-
- /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
- * at least one. See setup_uniforms() in brw_vec4.cpp.
- */
- this->uniform_array_size = 1;
- if (prog_data) {
- this->uniform_array_size = MAX2(stage_prog_data->nr_params, 1);
- }
-
- this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
- this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
-}
-
-vec4_visitor::~vec4_visitor()
-{
- hash_table_dtor(this->variable_ht);
-}
-
-
-void
-vec4_visitor::fail(const char *format, ...)
-{
- va_list va;
- char *msg;
-
- if (failed)
- return;
-
- failed = true;
-
- va_start(va, format);
- msg = ralloc_vasprintf(mem_ctx, format, va);
- va_end(va);
- msg = ralloc_asprintf(mem_ctx, "vec4 compile failed: %s\n", msg);
-
- this->fail_msg = msg;
-
- if (debug_flag) {
- fprintf(stderr, "%s", msg);
- }
}
} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
index 5d9027b2ea6..304d3f0015c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
@@ -43,10 +43,10 @@ vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
{
vec4_instruction *inst;
- inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1);
+ inst = bld.emit(BRW_OPCODE_CMP, bld.reg_null_d(), src0, src1);
inst->conditional_mod = conditional_mod;
- inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
+ inst = bld.emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
inst->predicate = BRW_PREDICATE_NORMAL;
}
@@ -68,11 +68,11 @@ vec4_vs_visitor::emit_program_code()
* mov.f0 dst 1.0
*/
src_reg one = src_reg(this, glsl_type::float_type);
- emit(MOV(dst_reg(one), src_reg(1.0f)));
+ bld.MOV(dst_reg(one), src_reg(1.0f));
for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
const struct prog_instruction *vpi = &prog->Instructions[insn];
- base_ir = vpi;
+ bld.set_base_ir(vpi);
dst_reg dst;
src_reg src[3];
@@ -89,11 +89,11 @@ vec4_vs_visitor::emit_program_code()
case OPCODE_ABS:
src[0].abs = true;
src[0].negate = false;
- emit(MOV(dst, src[0]));
+ bld.MOV(dst, src[0]);
break;
case OPCODE_ADD:
- emit(ADD(dst, src[0], src[1]));
+ bld.ADD(dst, src[0], src[1]);
break;
case OPCODE_ARL:
@@ -102,40 +102,40 @@ vec4_vs_visitor::emit_program_code()
dst_reg dst_f = dst;
dst_f.type = BRW_REGISTER_TYPE_F;
- emit(RNDD(dst_f, src[0]));
- emit(MOV(dst, src_reg(dst_f)));
+ bld.RNDD(dst_f, src[0]);
+ bld.MOV(dst, src_reg(dst_f));
} else {
- emit(RNDD(dst, src[0]));
+ bld.RNDD(dst, src[0]);
}
break;
case OPCODE_DP3:
- emit(DP3(dst, src[0], src[1]));
+ bld.DP3(dst, src[0], src[1]);
break;
case OPCODE_DP4:
- emit(DP4(dst, src[0], src[1]));
+ bld.DP4(dst, src[0], src[1]);
break;
case OPCODE_DPH:
- emit(DPH(dst, src[0], src[1]));
+ bld.DPH(dst, src[0], src[1]);
break;
case OPCODE_DST: {
dst_reg t = dst;
if (vpi->DstReg.WriteMask & WRITEMASK_X) {
t.writemask = WRITEMASK_X;
- emit(MOV(t, src_reg(1.0f)));
+ bld.MOV(t, src_reg(1.0f));
}
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
t.writemask = WRITEMASK_Y;
- emit(MUL(t, src[0], src[1]));
+ bld.MUL(t, src[0], src[1]);
}
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
t.writemask = WRITEMASK_Z;
- emit(MOV(t, src[0]));
+ bld.MOV(t, src[0]);
}
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
t.writemask = WRITEMASK_W;
- emit(MOV(t, src[1]));
+ bld.MOV(t, src[1]);
}
break;
}
@@ -146,46 +146,46 @@ vec4_vs_visitor::emit_program_code()
/* tmp_d = floor(src[0].x) */
src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
assert(tmp_d.type == BRW_REGISTER_TYPE_D);
- emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
+ bld.RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX));
/* result[0] = 2.0 ^ tmp */
/* Adjust exponent for floating point: exp += 127 */
dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
- emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
+ bld.ADD(tmp_d_x, tmp_d, src_reg(127));
/* Install exponent and sign. Excess drops off the edge: */
dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
- emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
+ bld.emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
}
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
result.writemask = WRITEMASK_Y;
- emit(FRC(result, src[0]));
+ bld.FRC(result, src[0]);
}
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
result.writemask = WRITEMASK_Z;
- emit_math(SHADER_OPCODE_EXP2, result, src[0]);
+ bld.emit_math(SHADER_OPCODE_EXP2, result, src[0]);
}
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
result.writemask = WRITEMASK_W;
- emit(MOV(result, src_reg(1.0f)));
+ bld.MOV(result, src_reg(1.0f));
}
break;
}
case OPCODE_EX2:
- emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
+ bld.emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
break;
case OPCODE_FLR:
- emit(RNDD(dst, src[0]));
+ bld.RNDD(dst, src[0]);
break;
case OPCODE_FRC:
- emit(FRC(dst, src[0]));
+ bld.FRC(dst, src[0]);
break;
case OPCODE_LG2:
- emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
+ bld.emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
break;
case OPCODE_LIT: {
@@ -207,36 +207,36 @@ vec4_vs_visitor::emit_program_code()
*/
if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
result.writemask = WRITEMASK_XW;
- emit(MOV(result, src_reg(1.0f)));
+ bld.MOV(result, src_reg(1.0f));
}
if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
result.writemask = WRITEMASK_YZ;
- emit(MOV(result, src_reg(0.0f)));
+ bld.MOV(result, src_reg(0.0f));
src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
- emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.CMP(bld.reg_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G);
+ bld.IF(BRW_PREDICATE_NORMAL);
if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
result.writemask = WRITEMASK_Y;
- emit(MOV(result, tmp_x));
+ bld.MOV(result, tmp_x);
}
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
/* if (tmp.y < 0) tmp.y = 0; */
src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
result.writemask = WRITEMASK_Z;
- emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
+ bld.emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
src_reg clamped_y(result);
clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
- emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
+ bld.emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
}
break;
}
@@ -260,19 +260,19 @@ vec4_vs_visitor::emit_program_code()
*/
if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
result.writemask = WRITEMASK_X;
- emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
- emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
+ bld.AND(result, arg0_ud, src_reg((1u << 31) - 1));
+ bld.emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
src_reg result_d(result_src);
result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
result.type = BRW_REGISTER_TYPE_F;
- emit(ADD(result, result_d, src_reg(-127)));
+ bld.ADD(result, result_d, src_reg(-127));
}
if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
result.writemask = WRITEMASK_Y;
result.type = BRW_REGISTER_TYPE_UD;
- emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
- emit(OR(result, result_src, src_reg(127u << 23)));
+ bld.AND(result, arg0_ud, src_reg((1u << 23) - 1));
+ bld.OR(result, result_src, src_reg(127u << 23));
}
if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
@@ -294,51 +294,51 @@ vec4_vs_visitor::emit_program_code()
result_x.swizzle = BRW_SWIZZLE_XXXX;
result_y.swizzle = BRW_SWIZZLE_YYYY;
result_z.swizzle = BRW_SWIZZLE_ZZZZ;
- emit_math(SHADER_OPCODE_LOG2, result, result_y);
- emit(ADD(result, result_z, result_x));
+ bld.emit_math(SHADER_OPCODE_LOG2, result, result_y);
+ bld.ADD(result, result_z, result_x);
}
if (vpi->DstReg.WriteMask & WRITEMASK_W) {
result.type = BRW_REGISTER_TYPE_F;
result.writemask = WRITEMASK_W;
- emit(MOV(result, src_reg(1.0f)));
+ bld.MOV(result, src_reg(1.0f));
}
break;
}
case OPCODE_MAD: {
src_reg temp = src_reg(this, glsl_type::vec4_type);
- emit(MUL(dst_reg(temp), src[0], src[1]));
- emit(ADD(dst, temp, src[2]));
+ bld.MUL(dst_reg(temp), src[0], src[1]);
+ bld.ADD(dst, temp, src[2]);
break;
}
case OPCODE_MAX:
- emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
+ bld.emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
break;
case OPCODE_MIN:
- emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
+ bld.emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
break;
case OPCODE_MOV:
- emit(MOV(dst, src[0]));
+ bld.MOV(dst, src[0]);
break;
case OPCODE_MUL:
- emit(MUL(dst, src[0], src[1]));
+ bld.MUL(dst, src[0], src[1]);
break;
case OPCODE_POW:
- emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
+ bld.emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
break;
case OPCODE_RCP:
- emit_math(SHADER_OPCODE_RCP, dst, src[0]);
+ bld.emit_math(SHADER_OPCODE_RCP, dst, src[0]);
break;
case OPCODE_RSQ:
- emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
+ bld.emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
break;
case OPCODE_SGE:
@@ -352,7 +352,7 @@ vec4_vs_visitor::emit_program_code()
case OPCODE_SUB: {
src_reg neg_src1 = src[1];
neg_src1.negate = !src[1].negate;
- emit(ADD(dst, src[0], neg_src1));
+ bld.ADD(dst, src[0], neg_src1);
break;
}
@@ -360,21 +360,21 @@ vec4_vs_visitor::emit_program_code()
/* Note that SWZ's extended swizzles are handled in the general
* get_src_reg() code.
*/
- emit(MOV(dst, src[0]));
+ bld.MOV(dst, src[0]);
break;
case OPCODE_XPD: {
src_reg t1 = src_reg(this, glsl_type::vec4_type);
src_reg t2 = src_reg(this, glsl_type::vec4_type);
- emit(MUL(dst_reg(t1),
+ bld.MUL(dst_reg(t1),
swizzle(src[0], BRW_SWIZZLE_YZXW),
- swizzle(src[1], BRW_SWIZZLE_ZXYW)));
- emit(MUL(dst_reg(t2),
+ swizzle(src[1], BRW_SWIZZLE_ZXYW));
+ bld.MUL(dst_reg(t2),
swizzle(src[0], BRW_SWIZZLE_ZXYW),
- swizzle(src[1], BRW_SWIZZLE_YZXW)));
+ swizzle(src[1], BRW_SWIZZLE_YZXW));
t2.negate = true;
- emit(ADD(dst, t1, t2));
+ bld.ADD(dst, t1, t2);
break;
}
@@ -388,7 +388,7 @@ vec4_vs_visitor::emit_program_code()
/* Copy the temporary back into the actual destination register. */
if (vpi->Opcode != OPCODE_END) {
- emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
+ bld.MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst));
}
}
@@ -475,7 +475,7 @@ vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
}
case PROGRAM_UNDEFINED:
- return dst_null_f();
+ return bld.reg_null_f();
default:
unreachable("vec4_vp: bad destination register file");
@@ -530,10 +530,10 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
src_reg reladdr = src_reg(this, glsl_type::int_type);
dst_reg dst_reladdr = dst_reg(reladdr);
dst_reladdr.writemask = WRITEMASK_X;
- emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
+ bld.ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index));
if (brw->gen < 6)
- emit(MUL(dst_reladdr, reladdr, src_reg(16)));
+ bld.MUL(dst_reladdr, reladdr, src_reg(16));
#if 0
assert(src.Index < this->uniforms);
@@ -547,17 +547,14 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
vec4_instruction *load;
if (brw->gen >= 7) {
- load = new(mem_ctx)
- vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
- dst_reg(result), surf_index, reladdr);
+ load = bld.emit(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ dst_reg(result), surf_index, reladdr);
} else {
- load = new(mem_ctx)
- vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
- dst_reg(result), surf_index, reladdr);
+ load = bld.emit(VS_OPCODE_PULL_CONSTANT_LOAD,
+ dst_reg(result), surf_index, reladdr);
load->base_mrf = 14;
load->mlen = 1;
}
- emit(load);
break;
}
@@ -571,7 +568,7 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
for (int i = 0; i < 4; i++) {
dst_reg t = dst_reg(result);
t.writemask = 1 << i;
- emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
+ bld.MOV(t, src_reg(plist->ParameterValues[src.Index][i].f));
}
break;
@@ -636,24 +633,24 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
if (src_mask) {
temp.writemask = src_mask;
- emit(MOV(temp, result));
+ bld.MOV(temp, result);
}
if (zeros_mask) {
temp.writemask = zeros_mask;
- emit(MOV(temp, src_reg(0.0f)));
+ bld.MOV(temp, src_reg(0.0f));
}
if (ones_mask) {
temp.writemask = ones_mask;
- emit(MOV(temp, src_reg(1.0f)));
+ bld.MOV(temp, src_reg(1.0f));
}
if (src.Negate) {
temp.writemask = src.Negate;
src_reg neg(temp_src);
neg.negate = true;
- emit(MOV(temp, neg));
+ bld.MOV(temp, neg);
}
result = temp_src;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 72b6ef03b42..ac544354d6a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -51,7 +51,7 @@ vec4_vs_visitor::emit_prolog()
dst_reg dst = reg;
dst.type = brw_type_for_base_type(glsl_type::vec4_type);
dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
- emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
+ bld.MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f));
}
/* Do sign recovery for 2101010 formats if required. */
@@ -59,19 +59,19 @@ vec4_vs_visitor::emit_prolog()
if (sign_recovery_shift.file == BAD_FILE) {
/* shift constant: <22,22,22,30> */
sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
- emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
- emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
+ bld.MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u));
+ bld.MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u));
}
- emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
- emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
+ bld.SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift));
+ bld.ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift));
}
/* Apply BGRA swizzle if required. */
if (wa_flags & BRW_ATTRIB_WA_BGRA) {
src_reg temp = src_reg(reg);
temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
- emit(MOV(reg, temp));
+ bld.MOV(reg, temp);
}
if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
@@ -87,17 +87,17 @@ vec4_vs_visitor::emit_prolog()
if (es3_normalize_factor.file == BAD_FILE) {
/* mul constant: 1 / (2^(b-1) - 1) */
es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
- emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
- src_reg(1.0f / ((1<<9) - 1))));
- emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
- src_reg(1.0f / ((1<<1) - 1))));
+ bld.MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
+ src_reg(1.0f / ((1<<9) - 1)));
+ bld.MOV(writemask(es3_normalize_factor, WRITEMASK_W),
+ src_reg(1.0f / ((1<<1) - 1)));
}
dst_reg dst = reg;
dst.type = brw_type_for_base_type(glsl_type::vec4_type);
- emit(MOV(dst, src_reg(reg_d)));
- emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
- emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
+ bld.MOV(dst, src_reg(reg_d));
+ bld.MUL(dst, src_reg(dst), src_reg(es3_normalize_factor));
+ bld.emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
} else {
/* The following equations are from the OpenGL 3.2 specification:
*
@@ -113,30 +113,30 @@ vec4_vs_visitor::emit_prolog()
if (normalize_factor.file == BAD_FILE) {
/* 1 / (2^b - 1) for b=<10,10,10,2> */
normalize_factor = dst_reg(this, glsl_type::vec4_type);
- emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
- src_reg(1.0f / ((1<<10) - 1))));
- emit(MOV(writemask(normalize_factor, WRITEMASK_W),
- src_reg(1.0f / ((1<<2) - 1))));
+ bld.MOV(writemask(normalize_factor, WRITEMASK_XYZ),
+ src_reg(1.0f / ((1<<10) - 1)));
+ bld.MOV(writemask(normalize_factor, WRITEMASK_W),
+ src_reg(1.0f / ((1<<2) - 1)));
}
dst_reg dst = reg;
dst.type = brw_type_for_base_type(glsl_type::vec4_type);
- emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
+ bld.MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud));
/* For signed normalization, we want the numerator to be 2c+1. */
if (wa_flags & BRW_ATTRIB_WA_SIGN) {
- emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
- emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
+ bld.MUL(dst, src_reg(dst), src_reg(2.0f));
+ bld.ADD(dst, src_reg(dst), src_reg(1.0f));
}
- emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
+ bld.MUL(dst, src_reg(dst), src_reg(normalize_factor));
}
}
if (wa_flags & BRW_ATTRIB_WA_SCALE) {
dst_reg dst = reg;
dst.type = brw_type_for_base_type(glsl_type::vec4_type);
- emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
+ bld.MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud));
}
}
}
@@ -193,7 +193,7 @@ vec4_vs_visitor::emit_urb_write_opcode(bool complete)
emit_shader_time_end();
}
- vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+ vec4_instruction *inst = bld.emit(VS_OPCODE_URB_WRITE);
inst->urb_write_flags = complete ?
BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
@@ -221,7 +221,7 @@ vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
&vs_compile->key.base, &vs_prog_data->base, prog,
MESA_SHADER_VERTEX,
mem_ctx, INTEL_DEBUG & DEBUG_VS, false /* no_spills */,
- ST_VS, ST_VS_WRITTEN, ST_VS_RESET),
+ ST_VS),
vs_compile(vs_compile),
vs_prog_data(vs_prog_data)
{
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index d16cc6ed8b7..f06da953bcf 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -70,20 +70,20 @@ gen6_gs_visitor::emit_prolog()
* flags for the next vertex come right after the data items and flags for
* the previous vertex.
*/
- this->current_annotation = "gen6 prolog";
+ bld.set_annotation("gen6 prolog");
this->vertex_output = src_reg(this,
glsl_type::uint_type,
(prog_data->vue_map.num_slots + 1) *
c->gp->program.VerticesOut);
this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
- emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
+ bld.MOV(dst_reg(this->vertex_output_offset), src_reg(0u));
/* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES),
* so initialize it once to R0.
*/
- vec4_instruction *inst = emit(MOV(dst_reg(MRF, 1),
+ vec4_instruction *inst = bld.MOV(dst_reg(MRF, 1),
retype(brw_vec8_grf(0, 0),
- BRW_REGISTER_TYPE_UD)));
+ BRW_REGISTER_TYPE_UD));
inst->force_writemask_all = true;
/* This will be used as a temporary to store writeback data of FF_SYNC
@@ -98,13 +98,13 @@ gen6_gs_visitor::emit_prolog()
* headers.
*/
this->first_vertex = src_reg(this, glsl_type::uint_type);
- emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
+ bld.MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START);
/* The FF_SYNC message requires to know the number of primitives generated,
* so keep a counter for this.
*/
this->prim_count = src_reg(this, glsl_type::uint_type);
- emit(MOV(dst_reg(this->prim_count), 0u));
+ bld.MOV(dst_reg(this->prim_count), 0u);
if (c->prog_data.gen6_xfb_enabled) {
/* Create a virtual register to hold destination indices in SOL */
@@ -115,8 +115,8 @@ gen6_gs_visitor::emit_prolog()
this->svbi = src_reg(this, glsl_type::uvec4_type);
/* Create a virtual register to hold max values of SVBI */
this->max_svbi = src_reg(this, glsl_type::uvec4_type);
- emit(MOV(dst_reg(this->max_svbi),
- src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))));
+ bld.MOV(dst_reg(this->max_svbi),
+ src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD)));
xfb_setup();
}
@@ -142,21 +142,21 @@ gen6_gs_visitor::emit_prolog()
if (c->prog_data.include_primitive_id) {
this->primitive_id =
src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
- emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
+ bld.emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
}
}
void
gen6_gs_visitor::visit(ir_emit_vertex *)
{
- this->current_annotation = "gen6 emit vertex";
+ bld.set_annotation("gen6 emit vertex");
/* Honor max_vertex layout indication in geometry shader by ignoring any
* vertices coming after c->gp->program.VerticesOut.
*/
unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
- BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.CMP(bld.reg_null_d(), this->vertex_count, src_reg(num_output_vertices),
+ BRW_CONDITIONAL_L);
+ bld.IF(BRW_PREDICATE_NORMAL);
{
/* Buffer all output slots for this vertex in vertex_output */
for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
@@ -183,12 +183,12 @@ gen6_gs_visitor::visit(ir_emit_vertex *)
dst_reg dst(this->vertex_output);
dst.reladdr = ralloc(mem_ctx, src_reg);
memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
- vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
+ vec4_instruction *inst = bld.MOV(dst, src_reg(tmp));
inst->force_writemask_all = true;
}
- emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ bld.ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, 1u);
}
/* Now buffer flags for this vertex */
@@ -199,32 +199,32 @@ gen6_gs_visitor::visit(ir_emit_vertex *)
/* If we are outputting points, then every vertex has PrimStart and
* PrimEnd set.
*/
- emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
- URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
- emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+ bld.MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
+ URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
+ bld.ADD(dst_reg(this->prim_count), this->prim_count, 1u);
} else {
/* Otherwise, we can only set the PrimStart flag, which we have stored
* in the first_vertex register. We will have to wait until we execute
* EndPrimitive() or we end the thread to set the PrimEnd flag on a
* vertex.
*/
- emit(OR(dst, this->first_vertex,
- (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
- emit(MOV(dst_reg(this->first_vertex), 0u));
+ bld.OR(dst, this->first_vertex,
+ (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT));
+ bld.MOV(dst_reg(this->first_vertex), 0u);
}
- emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ bld.ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, 1u);
/* Update vertex count */
- emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
+ bld.ADD(dst_reg(this->vertex_count), this->vertex_count, 1u);
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
}
void
gen6_gs_visitor::visit(ir_end_primitive *)
{
- this->current_annotation = "gen6 end primitive";
+ bld.set_annotation("gen6 end primitive");
/* Calling EndPrimitive() is optional for point output. In this case we set
* the PrimEnd flag when we process EmitVertex().
*/
@@ -241,40 +241,40 @@ gen6_gs_visitor::visit(ir_end_primitive *)
* below).
*/
unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
- BRW_CONDITIONAL_L));
- vec4_instruction *inst = emit(CMP(dst_null_d(),
+ bld.CMP(bld.reg_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
+ BRW_CONDITIONAL_L);
+ vec4_instruction *inst = bld.CMP(bld.reg_null_d(),
this->vertex_count, 0u,
- BRW_CONDITIONAL_NEQ));
+ BRW_CONDITIONAL_NEQ);
inst->predicate = BRW_PREDICATE_NORMAL;
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.IF(BRW_PREDICATE_NORMAL);
{
/* vertex_output_offset is already pointing at the first entry of the
* next vertex. So subtract 1 to modify the flags for the previous
* vertex.
*/
src_reg offset(this, glsl_type::uint_type);
- emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1)));
+ bld.ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1));
src_reg dst(this->vertex_output);
dst.reladdr = ralloc(mem_ctx, src_reg);
memcpy(dst.reladdr, &offset, sizeof(src_reg));
- emit(OR(dst_reg(dst), dst, URB_WRITE_PRIM_END));
- emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+ bld.OR(dst_reg(dst), dst, URB_WRITE_PRIM_END);
+ bld.ADD(dst_reg(this->prim_count), this->prim_count, 1u);
/* Set the first vertex flag to indicate that the next vertex will start
* a primitive.
*/
- emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
+ bld.MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START);
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
}
void
gen6_gs_visitor::emit_urb_write_header(int mrf)
{
- this->current_annotation = "gen6 urb header";
+ bld.set_annotation("gen6 urb header");
/* Compute offset of the flags for the current vertex in vertex_output and
* write them in dw2 of the message header.
*
@@ -284,14 +284,14 @@ gen6_gs_visitor::emit_urb_write_header(int mrf)
* slots per vertex to that offset to obtain the flags data offset.
*/
src_reg flags_offset(this, glsl_type::uint_type);
- emit(ADD(dst_reg(flags_offset),
- this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots)));
+ bld.ADD(dst_reg(flags_offset),
+ this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots));
src_reg flags_data(this->vertex_output);
flags_data.reladdr = ralloc(mem_ctx, src_reg);
memcpy(flags_data.reladdr, &flags_offset, sizeof(src_reg));
- emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
+ bld.emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
}
void
@@ -302,7 +302,7 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
if (!complete) {
/* If the vertex is not complete we don't have to do anything special */
- inst = emit(GS_OPCODE_URB_WRITE);
+ inst = bld.emit(GS_OPCODE_URB_WRITE);
inst->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
} else {
/* Otherwise we always request to allocate a new VUE handle. If this is
@@ -313,7 +313,7 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
* which would require to end the program with an IF/ELSE/ENDIF block,
* something we do not want.
*/
- inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
+ inst = bld.emit(GS_OPCODE_URB_WRITE_ALLOCATE);
inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
inst->dst = dst_reg(MRF, base_mrf);
inst->src[0] = this->temp;
@@ -339,12 +339,12 @@ gen6_gs_visitor::emit_thread_end()
* points because in the point case we set PrimEnd on all vertices.
*/
if (c->gp->program.OutputType != GL_POINTS) {
- emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.CMP(bld.reg_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z);
+ bld.IF(BRW_PREDICATE_NORMAL);
{
visit((ir_end_primitive *) NULL);
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
}
/* Here we have to:
@@ -367,38 +367,38 @@ gen6_gs_visitor::emit_thread_end()
int max_usable_mrf = 13;
/* Issue the FF_SYNC message and obtain the initial VUE handle. */
- emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.CMP(bld.reg_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G);
+ bld.IF(BRW_PREDICATE_NORMAL);
{
- this->current_annotation = "gen6 thread end: ff_sync";
+ bld.set_annotation("gen6 thread end: ff_sync");
vec4_instruction *inst;
if (c->prog_data.gen6_xfb_enabled) {
src_reg sol_temp(this, glsl_type::uvec4_type);
- emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
+ bld.emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
dst_reg(this->svbi),
this->vertex_count,
this->prim_count,
sol_temp);
- inst = emit(GS_OPCODE_FF_SYNC,
+ inst = bld.emit(GS_OPCODE_FF_SYNC,
dst_reg(this->temp), this->prim_count, this->svbi);
} else {
- inst = emit(GS_OPCODE_FF_SYNC,
+ inst = bld.emit(GS_OPCODE_FF_SYNC,
dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
}
inst->base_mrf = base_mrf;
/* Loop over all buffered vertices and emit URB write messages */
- this->current_annotation = "gen6 thread end: urb writes init";
+ bld.set_annotation("gen6 thread end: urb writes init");
src_reg vertex(this, glsl_type::uint_type);
- emit(MOV(dst_reg(vertex), 0u));
- emit(MOV(dst_reg(this->vertex_output_offset), 0u));
+ bld.MOV(dst_reg(vertex), 0u);
+ bld.MOV(dst_reg(this->vertex_output_offset), 0u);
- this->current_annotation = "gen6 thread end: urb writes";
- emit(BRW_OPCODE_DO);
+ bld.set_annotation("gen6 thread end: urb writes");
+ bld.emit(BRW_OPCODE_DO);
{
- emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
- inst = emit(BRW_OPCODE_BREAK);
+ bld.CMP(bld.reg_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE);
+ inst = bld.emit(BRW_OPCODE_BREAK);
inst->predicate = BRW_PREDICATE_NORMAL;
/* First we prepare the message header */
@@ -417,7 +417,7 @@ gen6_gs_visitor::emit_thread_end()
for (; slot < prog_data->vue_map.num_slots; ++slot) {
int varying = prog_data->vue_map.slot_to_varying[slot];
- current_annotation = output_reg_annotation[varying];
+ bld.set_annotation(output_reg_annotation[varying]);
/* Compute offset of this slot for the current vertex
* in vertex_output
@@ -431,12 +431,12 @@ gen6_gs_visitor::emit_thread_end()
dst_reg reg = dst_reg(MRF, mrf);
reg.type = output_reg[varying].type;
data.type = reg.type;
- vec4_instruction *inst = emit(MOV(reg, data));
+ vec4_instruction *inst = bld.MOV(reg, data);
inst->force_writemask_all = true;
mrf++;
- emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ bld.ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, 1u);
/* If this was max_usable_mrf, we can't fit anything more into
* this URB WRITE.
@@ -455,17 +455,17 @@ gen6_gs_visitor::emit_thread_end()
* to the first data item of the next vertex, so that we can start
* writing the next vertex.
*/
- emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ bld.ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, 1u);
- emit(ADD(dst_reg(vertex), vertex, 1u));
+ bld.ADD(dst_reg(vertex), vertex, 1u);
}
- emit(BRW_OPCODE_WHILE);
+ bld.emit(BRW_OPCODE_WHILE);
if (c->prog_data.gen6_xfb_enabled)
xfb_write();
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
/* Finally, emit EOT message.
*
@@ -482,17 +482,17 @@ gen6_gs_visitor::emit_thread_end()
* which works for both cases by setting the COMPLETE and UNUSED flags in
* the EOT message.
*/
- this->current_annotation = "gen6 thread end: EOT";
+ bld.set_annotation("gen6 thread end: EOT");
if (c->prog_data.gen6_xfb_enabled) {
/* When emitting EOT, set SONumPrimsWritten Increment Value. */
src_reg data(this, glsl_type::uint_type);
- emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu)));
- emit(SHL(dst_reg(data), data, brw_imm_ud(16u)));
- emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
+ bld.AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu));
+ bld.SHL(dst_reg(data), data, brw_imm_ud(16u));
+ bld.emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
}
- vec4_instruction *inst = emit(GS_OPCODE_THREAD_END);
+ vec4_instruction *inst = bld.emit(GS_OPCODE_THREAD_END);
inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
inst->base_mrf = base_mrf;
inst->mlen = 1;
@@ -610,10 +610,10 @@ gen6_gs_visitor::xfb_write()
unreachable("Unexpected primitive type in Gen6 SOL program.");
}
- this->current_annotation = "gen6 thread end: svb writes init";
+ bld.set_annotation("gen6 thread end: svb writes init");
- emit(MOV(dst_reg(this->vertex_output_offset), 0u));
- emit(MOV(dst_reg(this->sol_prim_written), 0u));
+ bld.MOV(dst_reg(this->vertex_output_offset), 0u);
+ bld.MOV(dst_reg(this->sol_prim_written), 0u);
/* Check that at least one primitive can be written
*
@@ -624,37 +624,37 @@ gen6_gs_visitor::xfb_write()
* transform feedback is in interleaved or separate attribs mode.
*/
src_reg sol_temp(this, glsl_type::uvec4_type);
- emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts)));
+ bld.ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts));
/* Compare SVBI calculated number with the maximum value, which is
* in R1.4 (previously saved in this->max_svbi) for gen6.
*/
- emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.CMP(bld.reg_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE);
+ bld.IF(BRW_PREDICATE_NORMAL);
{
- struct src_reg destination_indices_uw =
+ src_reg destination_indices_uw =
retype(destination_indices, BRW_REGISTER_TYPE_UW);
- vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw),
- brw_imm_v(0x00020100))); /* (0, 1, 2) */
+ vec4_instruction *inst = bld.MOV(dst_reg(destination_indices_uw),
+ brw_imm_v(0x00020100)); /* (0, 1, 2) */
inst->force_writemask_all = true;
- emit(ADD(dst_reg(this->destination_indices),
+ bld.ADD(dst_reg(this->destination_indices),
this->destination_indices,
- this->svbi));
+ this->svbi);
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
/* Write transform feedback data for all processed vertices. */
for (int i = 0; i < c->gp->program.VerticesOut; i++) {
- emit(MOV(dst_reg(sol_temp), i));
- emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
- BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.MOV(dst_reg(sol_temp), i);
+ bld.CMP(bld.reg_null_d(), sol_temp, this->vertex_count,
+ BRW_CONDITIONAL_L);
+ bld.IF(BRW_PREDICATE_NORMAL);
{
xfb_program(i, num_verts);
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
}
}
@@ -670,16 +670,16 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
/* Check for buffer overflow: we need room to write the complete primitive
* (all vertices). Otherwise, avoid writing any vertices for it
*/
- emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u));
- emit(MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts)));
- emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi));
- emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.ADD(dst_reg(sol_temp), this->sol_prim_written, 1u);
+ bld.MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts));
+ bld.ADD(dst_reg(sol_temp), sol_temp, this->svbi);
+ bld.CMP(bld.reg_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE);
+ bld.IF(BRW_PREDICATE_NORMAL);
{
/* Avoid overwriting MRF 1 as it is used as URB write message header */
dst_reg mrf_reg(MRF, 2);
- this->current_annotation = "gen6: emit SOL vertex data";
+ bld.set_annotation("gen6: emit SOL vertex data");
/* For each vertex, generate code to output each varying using the
* appropriate binding table entry.
*/
@@ -688,7 +688,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
prog_data->transform_feedback_bindings[binding];
/* Set up the correct destination index for this vertex */
- vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
+ vec4_instruction *inst = bld.emit(GS_OPCODE_SVB_SET_DST_INDEX,
mrf_reg,
this->destination_indices);
inst->sol_vertex = vertex % num_verts;
@@ -705,11 +705,11 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
/* Compute offset of this varying for the current vertex
* in vertex_output
*/
- this->current_annotation = output_reg_annotation[varying];
+ bld.set_annotation(output_reg_annotation[varying]);
src_reg data(this->vertex_output);
data.reladdr = ralloc(mem_ctx, src_reg);
int offset = get_vertex_output_offset_for_varying(vertex, varying);
- emit(MOV(dst_reg(this->vertex_output_offset), offset));
+ bld.MOV(dst_reg(this->vertex_output_offset), offset);
memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
data.type = output_reg[varying].type;
@@ -726,7 +726,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
data.swizzle = prog_data->transform_feedback_swizzles[binding];
/* Write data */
- inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
+ inst = bld.emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
inst->sol_binding = binding;
inst->sol_final_write = final_write;
@@ -734,17 +734,17 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
/* This is the last vertex of the primitive, then increment
* SO num primitive counter and destination indices.
*/
- emit(ADD(dst_reg(this->destination_indices),
+ bld.ADD(dst_reg(this->destination_indices),
this->destination_indices,
- brw_imm_ud(num_verts)));
- emit(ADD(dst_reg(this->sol_prim_written),
- this->sol_prim_written, 1u));
+ brw_imm_ud(num_verts));
+ bld.ADD(dst_reg(this->sol_prim_written),
+ this->sol_prim_written, 1u);
}
}
- this->current_annotation = NULL;
+ bld.set_annotation(NULL);
}
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
}
int