summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2012-12-06 22:36:50 -0800
committerKenneth Graunke <kenneth@whitecape.org>2014-01-18 21:55:54 -0800
commitf8035ba0363a8726fcd95ae761fbd31b3d5b41d4 (patch)
tree6b9c6d0927514a8d0fc0a9a1a072e18f66cb8012
parent8ea4b16eea05cddc0c0057d8ff7e84f97a9361f8 (diff)
i965: Add a new infrastructure for generating Broadwell shader assembly.
This replaces the brw_eu_emit.c layer for Broadwell. It will be used by both the vector and scalar shader backends. v2: Port to use the C-based instruction representation. v3: Fix destination register type for CMP. v4: Pass brw to gen8_instruction functions (required by rebase). v5: Remove bogus assertion on math instructions (caught by Piglit). v6: Remove more restrictions on math instructions (caught by Eric). Make ADDC and SUBB helpers set accumulator writes, like MAC and MACH (caught by Matt). v7: Don't implicitly force ALU3 operations to SIMD8 (we've been able to do SIMD16 versions since Haswell, but didn't when I originally wrote this code). Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Matt Turner <mattst88@gmail.com>
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/gen8_generator.cpp643
-rw-r--r--src/mesa/drivers/dri/i965/gen8_generator.h198
3 files changed, 842 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 4c629ccda32..ade40eded5a 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -140,5 +140,6 @@ i965_FILES = \
gen7_wm_state.c \
gen7_wm_surface_state.c \
gen8_disasm.c \
+ gen8_generator.cpp \
gen8_instruction.c \
$()
diff --git a/src/mesa/drivers/dri/i965/gen8_generator.cpp b/src/mesa/drivers/dri/i965/gen8_generator.cpp
new file mode 100644
index 00000000000..ee5f792187f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen8_generator.cpp
@@ -0,0 +1,643 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file gen8_generator.cpp
+ *
+ * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
+ */
+
+extern "C" {
+#include "main/compiler.h"
+#include "main/macros.h"
+#include "brw_context.h"
+} /* extern "C" */
+
+#include "glsl/ralloc.h"
+#include "brw_eu.h"
+#include "brw_reg.h"
+#include "gen8_generator.h"
+
+gen8_generator::gen8_generator(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ void *mem_ctx)
+ : shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx)
+{
+ ctx = &brw->ctx;
+
+ memset(&default_state, 0, sizeof(default_state));
+ default_state.mask_control = BRW_MASK_ENABLE;
+
+ store_size = 1024;
+ store = rzalloc_array(mem_ctx, gen8_instruction, store_size);
+ nr_inst = 0;
+ next_inst_offset = 0;
+
+ /* Set up the control flow stacks. */
+ if_stack_depth = 0;
+ if_stack_array_size = 16;
+ if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size);
+
+ loop_stack_depth = 0;
+ loop_stack_array_size = 16;
+ loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size);
+}
+
+gen8_generator::~gen8_generator()
+{
+}
+
+gen8_instruction *
+gen8_generator::next_inst(unsigned opcode)
+{
+ gen8_instruction *inst;
+
+ if (nr_inst + 1 > unsigned(store_size)) {
+ store_size <<= 1;
+ store = reralloc(mem_ctx, store, gen8_instruction, store_size);
+ assert(store);
+ }
+
+ next_inst_offset += 16;
+ inst = &store[nr_inst++];
+
+ memset(inst, 0, sizeof(gen8_instruction));
+
+ gen8_set_opcode(inst, opcode);
+ gen8_set_exec_size(inst, default_state.exec_size);
+ gen8_set_access_mode(inst, default_state.access_mode);
+ gen8_set_mask_control(inst, default_state.mask_control);
+ gen8_set_cond_modifier(inst, default_state.conditional_mod);
+ gen8_set_pred_control(inst, default_state.predicate);
+ gen8_set_pred_inv(inst, default_state.predicate_inverse);
+ gen8_set_saturate(inst, default_state.saturate);
+ gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr);
+ return inst;
+}
+
+#define ALU1(OP) \
+gen8_instruction * \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \
+{ \
+ gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
+ gen8_set_dst(brw, inst, dst); \
+ gen8_set_src0(brw, inst, src); \
+ return inst; \
+}
+
+#define ALU2(OP) \
+gen8_instruction * \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
+{ \
+ gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
+ gen8_set_dst(brw, inst, dst); \
+ gen8_set_src0(brw, inst, s0); \
+ gen8_set_src1(brw, inst, s1); \
+ return inst; \
+}
+
+#define ALU2_ACCUMULATE(OP) \
+gen8_instruction * \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
+{ \
+ gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
+ gen8_set_dst(brw, inst, dst); \
+ gen8_set_src0(brw, inst, s0); \
+ gen8_set_src1(brw, inst, s1); \
+ gen8_set_acc_wr_control(inst, true); \
+ return inst; \
+}
+
+#define ALU3(OP) \
+gen8_instruction * \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
+ struct brw_reg s1, struct brw_reg s2) \
+{ \
+ return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
+}
+
+#define ALU3F(OP) \
+gen8_instruction * \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
+ struct brw_reg s1, struct brw_reg s2) \
+{ \
+ assert(dst.type == BRW_REGISTER_TYPE_F); \
+ assert(s0.type == BRW_REGISTER_TYPE_F); \
+ assert(s1.type == BRW_REGISTER_TYPE_F); \
+ assert(s2.type == BRW_REGISTER_TYPE_F); \
+ return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
+}
+
+ALU2(ADD)
+ALU2(AND)
+ALU2(ASR)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(F32TO16)
+ALU1(F16TO32)
+ALU1(BFREV)
+ALU1(CBIT)
+ALU2_ACCUMULATE(ADDC)
+ALU2_ACCUMULATE(SUBB)
+ALU2(DP2)
+ALU2(DP3)
+ALU2(DP4)
+ALU2(DPH)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(FRC)
+ALU2(LINE)
+ALU3F(LRP)
+ALU3F(MAD)
+ALU2(MUL)
+ALU1(MOV)
+ALU1(NOT)
+ALU2(OR)
+ALU2(PLN)
+ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDZ)
+ALU2_ACCUMULATE(MAC)
+ALU2_ACCUMULATE(MACH)
+ALU2(SEL)
+ALU2(SHL)
+ALU2(SHR)
+ALU2(XOR)
+
+gen8_instruction *
+gen8_generator::CMP(struct brw_reg dst, unsigned conditional,
+ struct brw_reg src0, struct brw_reg src1)
+{
+ gen8_instruction *inst = next_inst(BRW_OPCODE_CMP);
+ gen8_set_cond_modifier(inst, conditional);
+ /* The CMP instruction appears to behave erratically for floating point
+ * sources unless the destination type is also float. Overriding it to
+ * match src0 makes it work in all cases.
+ */
+ dst.type = src0.type;
+ gen8_set_dst(brw, inst, dst);
+ gen8_set_src0(brw, inst, src0);
+ gen8_set_src1(brw, inst, src1);
+ return inst;
+}
+
+static int
+get_3src_subreg_nr(struct brw_reg reg)
+{
+ if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
+ assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
+ return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
+ } else {
+ return reg.subnr / 4;
+ }
+}
+
+gen8_instruction *
+gen8_generator::alu3(unsigned opcode,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2)
+{
+ /* MRFs haven't existed since Gen7, so we better not be using them. */
+ if (dst.file == BRW_MESSAGE_REGISTER_FILE) {
+ dst.file = BRW_GENERAL_REGISTER_FILE;
+ dst.nr += GEN7_MRF_HACK_START;
+ }
+
+ gen8_instruction *inst = next_inst(opcode);
+ assert(gen8_access_mode(inst) == BRW_ALIGN_16);
+
+ assert(dst.file == BRW_GENERAL_REGISTER_FILE);
+ assert(dst.nr < 128);
+ assert(dst.address_mode == BRW_ADDRESS_DIRECT);
+ assert(dst.type == BRW_REGISTER_TYPE_F ||
+ dst.type == BRW_REGISTER_TYPE_D ||
+ dst.type == BRW_REGISTER_TYPE_UD);
+ gen8_set_dst_3src_reg_nr(inst, dst.nr);
+ gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16);
+ gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask);
+
+ assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src0.address_mode == BRW_ADDRESS_DIRECT);
+ assert(src0.nr < 128);
+ gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle);
+ gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0));
+ gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0);
+ gen8_set_src0_3src_reg_nr(inst, src0.nr);
+ gen8_set_src0_3src_abs(inst, src0.abs);
+ gen8_set_src0_3src_negate(inst, src0.negate);
+
+ assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src1.address_mode == BRW_ADDRESS_DIRECT);
+ assert(src1.nr < 128);
+ gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle);
+ gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1));
+ gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0);
+ gen8_set_src1_3src_reg_nr(inst, src1.nr);
+ gen8_set_src1_3src_abs(inst, src1.abs);
+ gen8_set_src1_3src_negate(inst, src1.negate);
+
+ assert(src2.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src2.address_mode == BRW_ADDRESS_DIRECT);
+ assert(src2.nr < 128);
+ gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle);
+ gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2));
+ gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0);
+ gen8_set_src2_3src_reg_nr(inst, src2.nr);
+ gen8_set_src2_3src_abs(inst, src2.abs);
+ gen8_set_src2_3src_negate(inst, src2.negate);
+
+ /* Set both the source and destination types based on dst.type, ignoring
+ * the source register types. The MAD and LRP emitters both ensure that
+ * all register types are float. The BFE and BFI2 emitters, however, may
+ * send us mixed D and UD source types and want us to ignore that.
+ */
+ switch (dst.type) {
+ case BRW_REGISTER_TYPE_F:
+ gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F);
+ gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D);
+ gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D);
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD);
+ gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD);
+ break;
+ }
+
+ return inst;
+}
+
+gen8_instruction *
+gen8_generator::math(unsigned math_function,
+ struct brw_reg dst,
+ struct brw_reg src0)
+{
+ gen8_instruction *inst = next_inst(BRW_OPCODE_MATH);
+
+ assert(dst.hstride == src0.hstride);
+
+ gen8_set_math_function(inst, math_function);
+ gen8_set_dst(brw, inst, dst);
+ gen8_set_src0(brw, inst, src0);
+ return inst;
+}
+
+gen8_instruction *
+gen8_generator::MATH(unsigned math_function,
+ struct brw_reg dst,
+ struct brw_reg src0)
+{
+ assert(src0.type == BRW_REGISTER_TYPE_F);
+ gen8_instruction *inst = math(math_function, dst, src0);
+ return inst;
+}
+
+gen8_instruction *
+gen8_generator::MATH(unsigned math_function,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ bool int_math =
+ math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+ math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
+ math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER;
+
+ if (int_math) {
+ assert(src0.type != BRW_REGISTER_TYPE_F);
+ assert(src1.type != BRW_REGISTER_TYPE_F);
+ } else {
+ assert(src0.type == BRW_REGISTER_TYPE_F);
+ }
+
+ gen8_instruction *inst = math(math_function, dst, src0);
+ gen8_set_src1(brw, inst, src1);
+ return inst;
+}
+
+gen8_instruction *
+gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0)
+{
+ gen8_instruction *inst = next_inst(BRW_OPCODE_MOV);
+ gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
+ gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD));
+ gen8_set_mask_control(inst, BRW_MASK_DISABLE);
+
+ return inst;
+}
+
+
+gen8_instruction *
+gen8_generator::NOP()
+{
+ return next_inst(BRW_OPCODE_NOP);
+}
+
+void
+gen8_generator::push_if_stack(gen8_instruction *inst)
+{
+ if_stack[if_stack_depth] = inst - store;
+
+ ++if_stack_depth;
+ if (if_stack_array_size <= if_stack_depth) {
+ if_stack_array_size *= 2;
+ if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size);
+ }
+}
+
+gen8_instruction *
+gen8_generator::pop_if_stack()
+{
+ --if_stack_depth;
+ return &store[if_stack[if_stack_depth]];
+}
+
+/**
+ * Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.)
+ */
+void
+gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst,
+ gen8_instruction *else_inst,
+ gen8_instruction *endif_inst)
+{
+ assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF);
+ assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE);
+ assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF);
+
+ gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst));
+
+ if (else_inst == NULL) {
+ /* Patch IF -> ENDIF */
+ gen8_set_jip(if_inst, 16 * (endif_inst - if_inst));
+ gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
+ } else {
+ gen8_set_exec_size(else_inst, gen8_exec_size(if_inst));
+
+ /* Patch IF -> ELSE and ELSE -> ENDIF:
+ *
+ * The IF's JIP should point at the instruction after the ELSE.
+ * The IF's UIP should point to the ENDIF.
+ *
+ * Both are expressed in bytes, hence the multiply by 16...128-bits.
+ */
+ gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1));
+ gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
+
+ /* Patch ELSE -> ENDIF:
+ *
+ * Since we don't set branch_ctrl, both JIP and UIP point to ENDIF.
+ */
+ gen8_set_jip(else_inst, 16 * (endif_inst - else_inst));
+ gen8_set_uip(else_inst, 16 * (endif_inst - else_inst));
+ }
+ gen8_set_jip(endif_inst, 16);
+}
+
+gen8_instruction *
+gen8_generator::IF(unsigned predicate)
+{
+ gen8_instruction *inst = next_inst(BRW_OPCODE_IF);
+ gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+ gen8_set_exec_size(inst, default_state.exec_size);
+ gen8_set_pred_control(inst, predicate);
+ gen8_set_mask_control(inst, BRW_MASK_ENABLE);
+ push_if_stack(inst);
+
+ return inst;
+}
+
+gen8_instruction *
+gen8_generator::ELSE()
+{
+ gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE);
+ gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ gen8_set_mask_control(inst, BRW_MASK_ENABLE);
+ push_if_stack(inst);
+ return inst;
+}
+
+gen8_instruction *
+gen8_generator::ENDIF()
+{
+ gen8_instruction *if_inst = NULL;
+ gen8_instruction *else_inst = NULL;
+
+ gen8_instruction *tmp = pop_if_stack();
+ if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) {
+ else_inst = tmp;
+ tmp = pop_if_stack();
+ }
+ assert(gen8_opcode(tmp) == BRW_OPCODE_IF);
+ if_inst = tmp;
+
+ gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF);
+ gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE);
+ patch_IF_ELSE(if_inst, else_inst, endif_inst);
+
+ return endif_inst;
+}
+
+unsigned
+gen8_generator::next_ip(unsigned ip) const
+{
+ return ip + 16;
+}
+
+unsigned
+gen8_generator::find_next_block_end(unsigned start) const
+{
+ for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
+ gen8_instruction *inst = &store[ip / 16];
+
+ switch (gen8_opcode(inst)) {
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ case BRW_OPCODE_HALT:
+ return ip;
+ }
+ }
+
+ return 0;
+}
+
+/* There is no DO instruction on Gen6+, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+unsigned
+gen8_generator::find_loop_end(unsigned start) const
+{
+ /* Always start after the instruction (such as a WHILE) we're trying to fix
+ * up.
+ */
+ for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
+ gen8_instruction *inst = &store[ip / 16];
+
+ if (gen8_opcode(inst) == BRW_OPCODE_WHILE) {
+ if (ip + gen8_jip(inst) <= start)
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start;
+}
+
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK, CONT, and HALT instructions to their correct locations.
+ */
+void
+gen8_generator::patch_jump_targets()
+{
+ for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) {
+ gen8_instruction *inst = &store[ip / 16];
+
+ int block_end_ip = find_next_block_end(ip);
+ switch (gen8_opcode(inst)) {
+ case BRW_OPCODE_BREAK:
+ assert(block_end_ip != 0);
+ gen8_set_jip(inst, block_end_ip - ip);
+ gen8_set_uip(inst, find_loop_end(ip) - ip);
+ assert(gen8_uip(inst) != 0);
+ assert(gen8_jip(inst) != 0);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ assert(block_end_ip != 0);
+ gen8_set_jip(inst, block_end_ip - ip);
+ gen8_set_uip(inst, find_loop_end(ip) - ip);
+ assert(gen8_uip(inst) != 0);
+ assert(gen8_jip(inst) != 0);
+ break;
+ case BRW_OPCODE_ENDIF:
+ if (block_end_ip == 0)
+ gen8_set_jip(inst, 16);
+ else
+ gen8_set_jip(inst, block_end_ip - ip);
+ break;
+ case BRW_OPCODE_HALT:
+ /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
+ *
+ * "In case of the halt instruction not inside any conditional
+ * code block, the value of <JIP> and <UIP> should be the
+ * same. In case of the halt instruction inside conditional code
+ * block, the <UIP> should be the end of the program, and the
+ * <JIP> should be end of the most inner conditional code block."
+ *
+ * The uip will have already been set by whoever set up the
+ * instruction.
+ */
+ if (block_end_ip == 0) {
+ gen8_set_jip(inst, gen8_uip(inst));
+ } else {
+ gen8_set_jip(inst, block_end_ip - ip);
+ }
+ assert(gen8_uip(inst) != 0);
+ assert(gen8_jip(inst) != 0);
+ break;
+ }
+ }
+}
+
+void
+gen8_generator::DO()
+{
+ if (loop_stack_array_size < loop_stack_depth) {
+ loop_stack_array_size *= 2;
+ loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size);
+ }
+ loop_stack[loop_stack_depth++] = nr_inst;
+}
+
+gen8_instruction *
+gen8_generator::BREAK()
+{
+ gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK);
+ gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ gen8_set_src1(brw, inst, brw_imm_d(0));
+ gen8_set_exec_size(inst, default_state.exec_size);
+ return inst;
+}
+
+gen8_instruction *
+gen8_generator::CONTINUE()
+{
+ gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE);
+ gen8_set_dst(brw, inst, brw_ip_reg());
+ gen8_set_src0(brw, inst, brw_ip_reg());
+ gen8_set_src1(brw, inst, brw_imm_d(0));
+ gen8_set_exec_size(inst, default_state.exec_size);
+ return inst;
+}
+
+gen8_instruction *
+gen8_generator::WHILE()
+{
+ gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]];
+ gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE);
+
+ gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ gen8_set_src0(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ gen8_set_src1(brw, while_inst, brw_imm_ud(0));
+ gen8_set_jip(while_inst, 16 * (do_inst - while_inst));
+ gen8_set_exec_size(while_inst, default_state.exec_size);
+
+ return while_inst;
+}
+
+gen8_instruction *
+gen8_generator::HALT()
+{
+ gen8_instruction *inst = next_inst(BRW_OPCODE_HALT);
+ gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ gen8_set_exec_size(inst, default_state.exec_size);
+ gen8_set_mask_control(inst, BRW_MASK_DISABLE);
+ return inst;
+}
+
+void
+gen8_generator::disassemble(FILE *out, int start, int end)
+{
+ bool dump_hex = false;
+
+ for (int offset = start; offset < end; offset += 16) {
+ gen8_instruction *inst = &store[offset / 16];
+ printf("0x%08x: ", offset);
+
+ if (dump_hex) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *) inst)[3],
+ ((uint32_t *) inst)[2],
+ ((uint32_t *) inst)[1],
+ ((uint32_t *) inst)[0]);
+ }
+
+ gen8_disassemble(stdout, inst, brw->gen);
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/gen8_generator.h b/src/mesa/drivers/dri/i965/gen8_generator.h
new file mode 100644
index 00000000000..7d74267bcf1
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen8_generator.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file gen8_generator.h
+ *
+ * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
+ */
+
+#pragma once
+
+extern "C" {
+#include "main/macros.h"
+} /* extern "C" */
+
+#include "gen8_instruction.h"
+
+class gen8_generator {
+public:
+ gen8_generator(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ void *mem_ctx);
+ ~gen8_generator();
+
+ /**
+ * Instruction emitters.
+ * @{
+ */
+ #define ALU1(OP) \
+ gen8_instruction *OP(struct brw_reg dst, struct brw_reg src);
+ #define ALU2(OP) \
+ gen8_instruction *OP(struct brw_reg d, struct brw_reg, struct brw_reg);
+ #define ALU3(OP) \
+ gen8_instruction *OP(struct brw_reg d, \
+ struct brw_reg, struct brw_reg, struct brw_reg);
+ ALU2(ADD)
+ ALU2(AND)
+ ALU2(ASR)
+ ALU3(BFE)
+ ALU2(BFI1)
+ ALU3(BFI2)
+ ALU1(F32TO16)
+ ALU1(F16TO32)
+ ALU1(BFREV)
+ ALU1(CBIT)
+ ALU2(ADDC)
+ ALU2(SUBB)
+ ALU2(DP2)
+ ALU2(DP3)
+ ALU2(DP4)
+ ALU2(DPH)
+ ALU1(FBH)
+ ALU1(FBL)
+ ALU1(FRC)
+ ALU2(LINE)
+ ALU3(LRP)
+ ALU2(MAC)
+ ALU2(MACH)
+ ALU3(MAD)
+ ALU2(MUL)
+ ALU1(MOV)
+ ALU1(MOV_RAW)
+ ALU1(NOT)
+ ALU2(OR)
+ ALU2(PLN)
+ ALU1(RNDD)
+ ALU1(RNDE)
+ ALU1(RNDZ)
+ ALU2(SEL)
+ ALU2(SHL)
+ ALU2(SHR)
+ ALU2(XOR)
+ #undef ALU1
+ #undef ALU2
+ #undef ALU3
+
+ gen8_instruction *CMP(struct brw_reg dst, unsigned conditional,
+ struct brw_reg src0, struct brw_reg src1);
+ gen8_instruction *IF(unsigned predicate);
+ gen8_instruction *ELSE();
+ gen8_instruction *ENDIF();
+ void DO();
+ gen8_instruction *BREAK();
+ gen8_instruction *CONTINUE();
+ gen8_instruction *WHILE();
+
+ gen8_instruction *HALT();
+
+ gen8_instruction *MATH(unsigned math_function,
+ struct brw_reg dst,
+ struct brw_reg src0);
+ gen8_instruction *MATH(unsigned math_function,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ gen8_instruction *NOP();
+ /** @} */
+
+ void disassemble(FILE *out, int start, int end);
+
+protected:
+ gen8_instruction *alu3(unsigned opcode,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2);
+
+ gen8_instruction *math(unsigned math_function,
+ struct brw_reg dst,
+ struct brw_reg src0);
+
+ gen8_instruction *next_inst(unsigned opcode);
+
+ struct gl_shader_program *shader_prog;
+ struct gl_shader *shader;
+ struct gl_program *prog;
+
+ struct brw_context *brw;
+ struct intel_context *intel;
+ struct gl_context *ctx;
+
+ gen8_instruction *store;
+ unsigned store_size;
+ unsigned nr_inst;
+ unsigned next_inst_offset;
+
+ /**
+ * Control flow stacks:
+ *
+ * if_stack contains IF and ELSE instructions which must be patched with
+ * the final jump offsets (and popped) once the matching ENDIF is encountered.
+ *
+ * We actually store an array index into the store, rather than pointers
+ * to the instructions. This is necessary since we may realloc the store.
+ *
+ * @{
+ */
+ int *if_stack;
+ int if_stack_depth;
+ int if_stack_array_size;
+
+ int *loop_stack;
+ int loop_stack_depth;
+ int loop_stack_array_size;
+
+ int if_depth_in_loop;
+
+ void push_if_stack(gen8_instruction *inst);
+ gen8_instruction *pop_if_stack();
+ /** @} */
+
+ void patch_IF_ELSE(gen8_instruction *if_inst,
+ gen8_instruction *else_inst,
+ gen8_instruction *endif_inst);
+
+ unsigned next_ip(unsigned ip) const;
+ unsigned find_next_block_end(unsigned start_ip) const;
+ unsigned find_loop_end(unsigned start) const;
+
+ void patch_jump_targets();
+
+ /**
+ * Default state for new instructions.
+ */
+ struct {
+ unsigned exec_size;
+ unsigned access_mode;
+ unsigned mask_control;
+ unsigned flag_subreg_nr;
+ unsigned conditional_mod;
+ unsigned predicate;
+ bool predicate_inverse;
+ bool saturate;
+ } default_state;
+
+ void *mem_ctx;
+};