From 8c229d306b3f312adbdfbaf79967ee43fbfc839e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 11 Aug 2014 10:07:07 -0700 Subject: i965: Delete the Gen8 code generators. We now use the brw_eu_emit.c code instead. Signed-off-by: Kenneth Graunke Reviewed-by: Chris Forbes Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/Makefile.sources | 4 - src/mesa/drivers/dri/i965/brw_fs.h | 87 -- src/mesa/drivers/dri/i965/brw_vec4.h | 67 -- src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 1301 --------------------- src/mesa/drivers/dri/i965/gen8_generator.cpp | 620 ---------- src/mesa/drivers/dri/i965/gen8_generator.h | 196 ---- src/mesa/drivers/dri/i965/gen8_instruction.c | 458 -------- src/mesa/drivers/dri/i965/gen8_instruction.h | 418 ------- src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp | 925 --------------- 9 files changed, 4076 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp delete mode 100644 src/mesa/drivers/dri/i965/gen8_generator.cpp delete mode 100644 src/mesa/drivers/dri/i965/gen8_generator.h delete mode 100644 src/mesa/drivers/dri/i965/gen8_instruction.c delete mode 100644 src/mesa/drivers/dri/i965/gen8_instruction.h delete mode 100644 src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 1e5d1c68129..ca0d7cb76f4 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -150,16 +150,12 @@ i965_FILES = \ gen8_depth_state.c \ gen8_disable.c \ gen8_draw_upload.c \ - gen8_fs_generator.cpp \ - gen8_generator.cpp \ - gen8_instruction.c \ gen8_gs_state.c \ gen8_misc_state.c \ gen8_multisample_state.c \ gen8_sf_state.c \ gen8_sol_state.c \ gen8_surface_state.c \ - gen8_vec4_generator.cpp \ gen8_viewport_state.c \ gen8_vs_state.c \ gen8_wm_depth_stencil.c \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index dfb13ea709d..e7a82c4efb7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -48,7 +48,6 @@ extern "C" { #include "brw_shader.h" #include "intel_asm_annotation.h" } -#include "gen8_generator.h" #include "glsl/glsl_types.h" #include "glsl/ir.h" @@ -691,92 +690,6 @@ private: void *mem_ctx; }; -/** - * The fragment shader code generator. - * - * Translates FS IR to actual i965 assembly code. - */ -class gen8_fs_generator : public gen8_generator -{ -public: - gen8_fs_generator(struct brw_context *brw, - void *mem_ctx, - const struct brw_wm_prog_key *key, - struct brw_wm_prog_data *prog_data, - struct gl_shader_program *prog, - struct gl_fragment_program *fp, - bool dual_source_output); - ~gen8_fs_generator(); - - const unsigned *generate_assembly(exec_list *simd8_instructions, - exec_list *simd16_instructions, - unsigned *assembly_size); - -private: - void generate_code(exec_list *instructions); - void generate_fb_write(fs_inst *inst); - void generate_linterp(fs_inst *inst, struct brw_reg dst, - struct brw_reg *src); - void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, - struct brw_reg sampler_index); - void generate_math1(fs_inst *inst, struct brw_reg dst, struct brw_reg src); - void generate_math2(fs_inst *inst, struct brw_reg dst, - struct brw_reg src0, struct brw_reg src1); - void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); - void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, - bool negate_value); - void generate_scratch_write(fs_inst *inst, struct brw_reg src); - void generate_scratch_read(fs_inst *inst, struct brw_reg dst); - void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst); - void generate_uniform_pull_constant_load(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); - void generate_varying_pull_constant_load(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); - void generate_mov_dispatch_to_flags(fs_inst *ir); - void generate_set_omask(fs_inst *ir, - struct brw_reg dst, - struct brw_reg sample_mask); - void generate_set_sample_id(fs_inst *ir, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_set_simd4x2_offset(fs_inst *ir, - struct brw_reg dst, - struct brw_reg offset); - void generate_pack_half_2x16_split(fs_inst *inst, - struct brw_reg dst, - struct brw_reg x, - struct brw_reg y); - void generate_unpack_half_2x16_split(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src); - void generate_untyped_atomic(fs_inst *inst, - struct brw_reg dst, - struct brw_reg atomic_op, - struct brw_reg surf_index); - - void generate_untyped_surface_read(fs_inst *inst, - struct brw_reg dst, - struct brw_reg surf_index); - void generate_discard_jump(fs_inst *ir); - - bool patch_discard_jumps_to_fb_writes(); - - const struct brw_wm_prog_key *const key; - struct brw_wm_prog_data *prog_data; - const struct gl_fragment_program *fp; - - unsigned dispatch_width; /** 8 or 16 */ - - bool dual_source_output; - - exec_list discard_halt_patches; -}; - bool brw_do_channel_expressions(struct exec_list *instructions); bool brw_do_vector_splitting(struct exec_list *instructions); bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 90012860498..67132c0c1c5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -40,7 +40,6 @@ extern "C" { #ifdef __cplusplus }; /* extern "C" */ -#include "gen8_generator.h" #endif #include "glsl/ir.h" @@ -702,72 +701,6 @@ private: const bool debug_flag; }; -/** - * The vertex shader code generator. - * - * Translates VS IR to actual i965 assembly code. - */ -class gen8_vec4_generator : public gen8_generator -{ -public: - gen8_vec4_generator(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_vec4_prog_data *prog_data, - void *mem_ctx, - bool debug_flag); - ~gen8_vec4_generator(); - - const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size); - -private: - void generate_code(exec_list *instructions); - void generate_vec4_instruction(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg *src); - - void generate_tex(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg sampler_index); - - void generate_urb_write(vec4_instruction *ir, bool copy_g0); - void generate_gs_thread_end(vec4_instruction *ir); - void generate_gs_set_write_offset(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_gs_set_vertex_count(struct brw_reg dst, - struct brw_reg src); - void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src); - void generate_gs_prepare_channel_masks(struct brw_reg dst); - void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src); - - void generate_oword_dual_block_offsets(struct brw_reg m1, - struct brw_reg index); - void generate_scratch_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg index); - void generate_scratch_read(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index); - void generate_pull_constant_load(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); - void generate_untyped_atomic(vec4_instruction *ir, - struct brw_reg dst, - struct brw_reg atomic_op, - struct brw_reg surf_index); - void generate_untyped_surface_read(vec4_instruction *ir, - struct brw_reg dst, - struct brw_reg surf_index); - - struct brw_vec4_prog_data *prog_data; - - const bool debug_flag; -}; - - } /* namespace brw */ #endif /* __cplusplus */ diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp deleted file mode 100644 index b7e7cf91b3f..00000000000 --- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp +++ /dev/null @@ -1,1301 +0,0 @@ -/* - * Copyright © 2010, 2011, 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file gen8_fs_generate.cpp - * - * Code generation for Gen8+ hardware. - */ - -extern "C" { -#include "main/macros.h" -#include "brw_context.h" -} /* extern "C" */ - -#include "brw_fs.h" -#include "brw_cfg.h" -#include "glsl/ir_print_visitor.h" - -gen8_fs_generator::gen8_fs_generator(struct brw_context *brw, - void *mem_ctx, - const struct brw_wm_prog_key *key, - struct brw_wm_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_fragment_program *fp, - bool dual_source_output) - : gen8_generator(brw, shader_prog, fp ? &fp->Base : NULL, mem_ctx), - key(key), prog_data(prog_data), - fp(fp), dual_source_output(dual_source_output) -{ -} - -gen8_fs_generator::~gen8_fs_generator() -{ -} - -void -gen8_fs_generator::generate_fb_write(fs_inst *ir) -{ - /* Disable the discard condition while setting up the header. */ - default_state.predicate = BRW_PREDICATE_NONE; - default_state.predicate_inverse = false; - default_state.flag_subreg_nr = 0; - - if (ir->header_present) { - /* The GPU will use the predicate on SENDC, unless the header is present. - */ - if (fp && fp->UsesKill) { - gen8_instruction *mov = - MOV(retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW), - brw_flag_reg(0, 1)); - gen8_set_mask_control(mov, BRW_MASK_DISABLE); - } - - gen8_instruction *mov = - MOV_RAW(brw_message_reg(ir->base_mrf), brw_vec8_grf(0, 0)); - gen8_set_exec_size(mov, BRW_EXECUTE_16); - - if (ir->target > 0 && key->replicate_alpha) { - /* Set "Source0 Alpha Present to RenderTarget" bit in the header. */ - gen8_instruction *inst = - OR(get_element_ud(brw_message_reg(ir->base_mrf), 0), - vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)), - brw_imm_ud(1 << 11)); - gen8_set_mask_control(inst, BRW_MASK_DISABLE); - } - - if (ir->target > 0) { - /* Set the render target index for choosing BLEND_STATE. */ - MOV_RAW(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, ir->base_mrf, 2), - brw_imm_ud(ir->target)); - } - } - - /* Set the predicate back to get the conditional write if necessary for - * discards. - */ - default_state.predicate = ir->predicate; - default_state.predicate_inverse = ir->predicate_inverse; - default_state.flag_subreg_nr = ir->flag_subreg; - - gen8_instruction *inst = next_inst(BRW_OPCODE_SENDC); - gen8_set_dst(brw, inst, retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW)); - gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf)); - - /* Set up the "Message Specific Control" bits for the Data Port Message - * Descriptor. These are documented in the "Render Target Write" message's - * "Message Descriptor" documentation (vol5c.2). - */ - uint32_t msg_type; - /* Set the Message Type */ - if (this->dual_source_output) - msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; - else if (dispatch_width == 16) - msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; - else - msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - - uint32_t msg_control = msg_type; - - /* Set "Last Render Target Select" on the final FB write. */ - if (ir->eot) - msg_control |= (1 << 4); /* Last Render Target Select */ - - uint32_t surf_index = - prog_data->binding_table.render_target_start + ir->target; - - gen8_set_dp_message(brw, inst, - GEN6_SFID_DATAPORT_RENDER_CACHE, - surf_index, - GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, - msg_control, - ir->mlen, - 0, - ir->header_present, - ir->eot); - - brw_mark_surface_used(&prog_data->base, surf_index); -} - -void -gen8_fs_generator::generate_linterp(fs_inst *inst, - struct brw_reg dst, - struct brw_reg *src) -{ - struct brw_reg delta_x = src[0]; - struct brw_reg delta_y = src[1]; - struct brw_reg interp = src[2]; - - (void) delta_y; - assert(delta_y.nr == delta_x.nr + 1); - PLN(dst, interp, delta_x); -} - -void -gen8_fs_generator::generate_tex(fs_inst *ir, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg sampler_index) -{ - int msg_type = -1; - int rlen = 4; - uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - - assert(src.file == BRW_GENERAL_REGISTER_FILE); - - if (dispatch_width == 16 && !ir->force_uncompressed && !ir->force_sechalf) - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - - switch (ir->opcode) { - case SHADER_OPCODE_TEX: - if (ir->shadow_compare) { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; - } - break; - case FS_OPCODE_TXB: - if (ir->shadow_compare) { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; - } - break; - case SHADER_OPCODE_TXL: - if (ir->shadow_compare) { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; - } - break; - case SHADER_OPCODE_TXS: - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; - break; - case SHADER_OPCODE_TXD: - if (ir->shadow_compare) { - msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; - } - break; - case SHADER_OPCODE_TXF: - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; - break; - case SHADER_OPCODE_TXF_CMS: - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; - break; - case SHADER_OPCODE_TXF_UMS: - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS; - break; - case SHADER_OPCODE_TXF_MCS: - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; - break; - case SHADER_OPCODE_LOD: - msg_type = GEN5_SAMPLER_MESSAGE_LOD; - break; - case SHADER_OPCODE_TG4: - if (ir->shadow_compare) { - assert(brw->gen >= 7); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; - } else { - assert(brw->gen >= 6); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; - } - break; - case SHADER_OPCODE_TG4_OFFSET: - assert(brw->gen >= 7); - if (ir->shadow_compare) { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; - } else { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; - } - break; - default: - unreachable("not reached"); - } - assert(msg_type != -1); - - if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { - rlen = 8; - dst = vec16(dst); - } - - assert(sampler_index.file == BRW_IMMEDIATE_VALUE); - assert(sampler_index.type == BRW_REGISTER_TYPE_UD); - - uint32_t sampler = sampler_index.dw1.ud; - - if (ir->header_present) { - /* The send-from-GRF for SIMD16 texturing with a header has an extra - * hardware register allocated to it, which we need to skip over (since - * our coordinates in the payload are in the even-numbered registers, - * and the header comes right before the first one. - */ - if (dispatch_width == 16) - src.nr++; - - unsigned save_exec_size = default_state.exec_size; - default_state.exec_size = BRW_EXECUTE_8; - - MOV_RAW(src, brw_vec8_grf(0, 0)); - - if (ir->texture_offset) { - /* Set the texel offset bits. */ - MOV_RAW(retype(brw_vec1_grf(src.nr, 2), BRW_REGISTER_TYPE_UD), - brw_imm_ud(ir->texture_offset)); - } - - if (sampler >= 16) { - /* The "Sampler Index" field can only store values between 0 and 15. - * However, we can add an offset to the "Sampler State Pointer" - * field, effectively selecting a different set of 16 samplers. - * - * The "Sampler State Pointer" needs to be aligned to a 32-byte - * offset, and each sampler state is only 16-bytes, so we can't - * exclusively use the offset - we have to use both. - */ - const int sampler_state_size = 16; /* 16 bytes */ - gen8_instruction *add = - ADD(get_element_ud(src, 3), - get_element_ud(brw_vec8_grf(0, 0), 3), - brw_imm_ud(16 * (sampler / 16) * sampler_state_size)); - gen8_set_mask_control(add, BRW_MASK_DISABLE); - } - - default_state.exec_size = save_exec_size; - } - - uint32_t surf_index = - prog_data->base.binding_table.texture_start + sampler; - - gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, inst, dst); - gen8_set_src0(brw, inst, src); - gen8_set_sampler_message(brw, inst, - surf_index, - sampler % 16, - msg_type, - rlen, - ir->mlen, - ir->header_present, - simd_mode); - - brw_mark_surface_used(&prog_data->base, surf_index); -} - - -/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input - * looking like: - * - * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br - * - * and we're trying to produce: - * - * DDX DDY - * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) - * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) - * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) - * (ss0.br - ss0.bl) (ss0.tr - ss0.br) - * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) - * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) - * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) - * (ss1.br - ss1.bl) (ss1.tr - ss1.br) - * - * and add another set of two more subspans if in 16-pixel dispatch mode. - * - * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result - * for each pair, and vertstride = 2 jumps us 2 elements after processing a - * pair. But for DDY, it's harder, as we want to produce the pairs swizzled - * between each other. We could probably do it like ddx and swizzle the right - * order later, but bail for now and just produce - * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) - */ -void -gen8_fs_generator::generate_ddx(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src) -{ - unsigned vstride, width; - - if (key->high_quality_derivatives) { - /* Produce accurate derivatives. */ - vstride = BRW_VERTICAL_STRIDE_2; - width = BRW_WIDTH_2; - } else { - /* Replicate the derivative at the top-left pixel to other pixels. */ - vstride = BRW_VERTICAL_STRIDE_4; - width = BRW_WIDTH_4; - } - - struct brw_reg src0 = brw_reg(src.file, src.nr, 1, - BRW_REGISTER_TYPE_F, - vstride, - width, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - struct brw_reg src1 = brw_reg(src.file, src.nr, 0, - BRW_REGISTER_TYPE_F, - vstride, - width, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - ADD(dst, src0, negate(src1)); -} - -/* The negate_value boolean is used to negate the derivative computation for - * FBOs, since they place the origin at the upper left instead of the lower - * left. - */ -void -gen8_fs_generator::generate_ddy(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src, - bool negate_value) -{ - unsigned hstride; - unsigned src0_swizzle; - unsigned src1_swizzle; - unsigned src1_subnr; - - if (key->high_quality_derivatives) { - /* Produce accurate derivatives. */ - hstride = BRW_HORIZONTAL_STRIDE_1; - src0_swizzle = BRW_SWIZZLE_XYXY; - src1_swizzle = BRW_SWIZZLE_ZWZW; - src1_subnr = 0; - - default_state.access_mode = BRW_ALIGN_16; - } else { - /* Replicate the derivative at the top-left pixel to other pixels. */ - hstride = BRW_HORIZONTAL_STRIDE_0; - src0_swizzle = BRW_SWIZZLE_XYZW; - src1_swizzle = BRW_SWIZZLE_XYZW; - src1_subnr = 2; - } - - struct brw_reg src0 = brw_reg(src.file, src.nr, 0, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - hstride, - src0_swizzle, WRITEMASK_XYZW); - struct brw_reg src1 = brw_reg(src.file, src.nr, src1_subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - hstride, - src1_swizzle, WRITEMASK_XYZW); - - if (negate_value) - ADD(dst, src1, negate(src0)); - else - ADD(dst, src0, negate(src1)); - - default_state.access_mode = BRW_ALIGN_1; -} - -void -gen8_fs_generator::generate_scratch_write(fs_inst *ir, struct brw_reg src) -{ - MOV(retype(brw_message_reg(ir->base_mrf + 1), BRW_REGISTER_TYPE_UD), - retype(src, BRW_REGISTER_TYPE_UD)); - - struct brw_reg mrf = - retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD); - - const int num_regs = dispatch_width / 8; - - uint32_t msg_control; - if (num_regs == 1) - msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; - else - msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; - - /* Set up the message header. This is g0, with g0.2 filled with - * the offset. We don't want to leave our offset around in g0 or - * it'll screw up texture samples, so set it up inside the message - * reg. - */ - unsigned save_exec_size = default_state.exec_size; - default_state.exec_size = BRW_EXECUTE_8; - - MOV_RAW(mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - /* set message header global offset field (reg 0, element 2) */ - MOV_RAW(get_element_ud(mrf, 2), brw_imm_ud(ir->offset / 16)); - - struct brw_reg dst; - if (dispatch_width == 16) - dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); - else - dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); - - default_state.exec_size = BRW_EXECUTE_16; - - gen8_instruction *send = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, send, dst); - gen8_set_src0(brw, send, mrf); - gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE, - 255, /* binding table index: stateless access */ - GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, - msg_control, - 1 + num_regs, /* mlen */ - 0, /* rlen */ - true, /* header present */ - false); /* EOT */ - - default_state.exec_size = save_exec_size; -} - -void -gen8_fs_generator::generate_scratch_read(fs_inst *ir, struct brw_reg dst) -{ - struct brw_reg mrf = - retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD); - - const int num_regs = dispatch_width / 8; - - uint32_t msg_control; - if (num_regs == 1) - msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; - else - msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; - - unsigned save_exec_size = default_state.exec_size; - default_state.exec_size = BRW_EXECUTE_8; - - MOV_RAW(mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - /* set message header global offset field (reg 0, element 2) */ - MOV_RAW(get_element_ud(mrf, 2), brw_imm_ud(ir->offset / 16)); - - gen8_instruction *send = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, send, retype(dst, BRW_REGISTER_TYPE_UW)); - gen8_set_src0(brw, send, mrf); - gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE, - 255, /* binding table index: stateless access */ - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, - msg_control, - 1, /* mlen */ - num_regs, /* rlen */ - true, /* header present */ - false); /* EOT */ - - default_state.exec_size = save_exec_size; -} - -void -gen8_fs_generator::generate_scratch_read_gen7(fs_inst *ir, struct brw_reg dst) -{ - unsigned save_exec_size = default_state.exec_size; - gen8_instruction *send = next_inst(BRW_OPCODE_SEND); - - int num_regs = dispatch_width / 8; - - /* According to the docs, offset is "A 12-bit HWord offset into the memory - * Immediate Memory buffer as specified by binding table 0xFF." An HWORD - * is 32 bytes, which happens to be the size of a register. - */ - int offset = ir->offset / REG_SIZE; - - /* The HW requires that the header is present; this is to get the g0.5 - * scratch offset. - */ - gen8_set_src0(brw, send, brw_vec8_grf(0, 0)); - gen8_set_dst(brw, send, retype(dst, BRW_REGISTER_TYPE_UW)); - gen8_set_dp_scratch_message(brw, send, - false, /* scratch read */ - false, /* OWords */ - false, /* invalidate after read */ - num_regs, - offset, - 1, /* mlen - just g0 */ - num_regs, /* rlen */ - true, /* header present */ - false); /* EOT */ - - default_state.exec_size = save_exec_size; -} - -void -gen8_fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) -{ - assert(inst->mlen == 0); - - assert(index.file == BRW_IMMEDIATE_VALUE && - index.type == BRW_REGISTER_TYPE_UD); - uint32_t surf_index = index.dw1.ud; - - assert(offset.file == BRW_GENERAL_REGISTER_FILE); - /* Reference only the dword we need lest we anger validate_reg() with - * reg.width > reg.execszie. - */ - offset = brw_vec1_grf(offset.nr, 0); - - gen8_instruction *send = next_inst(BRW_OPCODE_SEND); - gen8_set_mask_control(send, BRW_MASK_DISABLE); - - /* We use the SIMD4x2 mode because we want to end up with 4 constants in - * the destination loaded consecutively from the same offset (which appears - * in the first component, and the rest are ignored). - */ - dst.width = BRW_WIDTH_4; - gen8_set_dst(brw, send, dst); - gen8_set_src0(brw, send, offset); - gen8_set_sampler_message(brw, send, - surf_index, - 0, /* The LD message ignores the sampler unit. */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ - BRW_SAMPLER_SIMD_MODE_SIMD4X2); - - brw_mark_surface_used(&prog_data->base, surf_index); -} - -void -gen8_fs_generator::generate_varying_pull_constant_load(fs_inst *ir, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) -{ - /* Varying-offset pull constant loads are treated as a normal expression on - * gen7, so the fact that it's a send message is hidden at the IR level. - */ - assert(!ir->header_present); - assert(!ir->mlen); - - assert(index.file == BRW_IMMEDIATE_VALUE && - index.type == BRW_REGISTER_TYPE_UD); - uint32_t surf_index = index.dw1.ud; - - uint32_t simd_mode, rlen, mlen; - if (dispatch_width == 16) { - mlen = 2; - rlen = 8; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } else { - mlen = 1; - rlen = 4; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - } - - gen8_instruction *send = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, send, dst); - gen8_set_src0(brw, send, offset); - gen8_set_sampler_message(brw, send, - surf_index, - 0, /* The LD message ignore the sampler unit. */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - rlen, /* rlen */ - mlen, /* mlen */ - false, /* no header */ - simd_mode); - - brw_mark_surface_used(&prog_data->base, surf_index); -} - -/** - * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred - * into the flags register (f0.0). - */ -void -gen8_fs_generator::generate_mov_dispatch_to_flags(fs_inst *ir) -{ - struct brw_reg flags = brw_flag_reg(0, ir->flag_subreg); - struct brw_reg dispatch_mask = - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); - - gen8_instruction *mov = MOV(flags, dispatch_mask); - gen8_set_mask_control(mov, BRW_MASK_DISABLE); -} - -void -gen8_fs_generator::generate_discard_jump(fs_inst *ir) -{ - /* This HALT will be patched up at FB write time to point UIP at the end of - * the program, and at brw_uip_jip() JIP will be set to the end of the - * current block (or the program). - */ - discard_halt_patches.push_tail(new(mem_ctx) ip_record(nr_inst)); - - HALT(); -} - -bool -gen8_fs_generator::patch_discard_jumps_to_fb_writes() -{ - if (discard_halt_patches.is_empty()) - return false; - - /* There is a somewhat strange undocumented requirement of using - * HALT, according to the simulator. If some channel has HALTed to - * a particular UIP, then by the end of the program, every channel - * must have HALTed to that UIP. Furthermore, the tracking is a - * stack, so you can't do the final halt of a UIP after starting - * halting to a new UIP. - * - * Symptoms of not emitting this instruction on actual hardware - * included GPU hangs and sparkly rendering on the piglit discard - * tests. - */ - gen8_instruction *last_halt = HALT(); - gen8_set_uip(last_halt, 16); - gen8_set_jip(last_halt, 16); - - int ip = nr_inst; - - foreach_in_list(ip_record, patch_ip, &discard_halt_patches) { - gen8_instruction *patch = &store[patch_ip->ip]; - assert(gen8_opcode(patch) == BRW_OPCODE_HALT); - - /* HALT takes an instruction distance from the pre-incremented IP. */ - gen8_set_uip(patch, (ip - patch_ip->ip) * 16); - } - - this->discard_halt_patches.make_empty(); - return true; -} - -/** - * Sets the first dword of a vgrf for simd4x2 uniform pull constant - * sampler LD messages. - * - * We don't want to bake it into the send message's code generation because - * that means we don't get a chance to schedule the instruction. - */ -void -gen8_fs_generator::generate_set_simd4x2_offset(fs_inst *ir, - struct brw_reg dst, - struct brw_reg value) -{ - assert(value.file == BRW_IMMEDIATE_VALUE); - MOV_RAW(retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value); -} - -/** - * Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0 - * (when mask is passed as a uniform) of register mask before moving it - * to register dst. - */ -void -gen8_fs_generator::generate_set_omask(fs_inst *inst, - struct brw_reg dst, - struct brw_reg mask) -{ - assert(dst.type == BRW_REGISTER_TYPE_UW); - - if (dispatch_width == 16) - dst = vec16(dst); - - if (mask.vstride == BRW_VERTICAL_STRIDE_8 && - mask.width == BRW_WIDTH_8 && - mask.hstride == BRW_HORIZONTAL_STRIDE_1) { - mask = stride(mask, 16, 8, 2); - } else { - assert(mask.vstride == BRW_VERTICAL_STRIDE_0 && - mask.width == BRW_WIDTH_1 && - mask.hstride == BRW_HORIZONTAL_STRIDE_0); - } - - gen8_instruction *mov = MOV(dst, retype(mask, dst.type)); - gen8_set_mask_control(mov, BRW_MASK_DISABLE); -} - -/** - * Do a special ADD with vstride=1, width=4, hstride=0 for src1. - */ -void -gen8_fs_generator::generate_set_sample_id(fs_inst *ir, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) -{ - assert(dst.type == BRW_REGISTER_TYPE_D || dst.type == BRW_REGISTER_TYPE_UD); - assert(src0.type == BRW_REGISTER_TYPE_D || src0.type == BRW_REGISTER_TYPE_UD); - - struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW); - - unsigned save_exec_size = default_state.exec_size; - default_state.exec_size = BRW_EXECUTE_8; - - gen8_instruction *add = ADD(dst, src0, reg); - gen8_set_mask_control(add, BRW_MASK_DISABLE); - if (dispatch_width == 16) { - add = ADD(offset(dst, 1), offset(src0, 1), suboffset(reg, 2)); - gen8_set_mask_control(add, BRW_MASK_DISABLE); - } - - default_state.exec_size = save_exec_size; -} - -/** - * Change the register's data type from UD to HF, doubling the strides in order - * to compensate for halving the data type width. - */ -static struct brw_reg -ud_reg_to_hf(struct brw_reg r) -{ - assert(r.type == BRW_REGISTER_TYPE_UD); - r.type = BRW_REGISTER_TYPE_HF; - - /* The BRW_*_STRIDE enums are defined so that incrementing the field - * doubles the real stride. - */ - if (r.hstride != 0) - ++r.hstride; - if (r.vstride != 0) - ++r.vstride; - - return r; -} - -void -gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst, - struct brw_reg dst, - struct brw_reg x, - struct brw_reg y) -{ - assert(dst.type == BRW_REGISTER_TYPE_UD); - assert(x.type == BRW_REGISTER_TYPE_F); - assert(y.type == BRW_REGISTER_TYPE_F); - - struct brw_reg dst_hf = ud_reg_to_hf(dst); - - /* Give each 32-bit channel of dst the form below , where "." means - * unchanged. - * 0x....hhhh - */ - MOV(dst_hf, y); - - /* Now the form: - * 0xhhhh0000 - */ - SHL(dst, dst, brw_imm_ud(16u)); - - /* And, finally the form of packHalf2x16's output: - * 0xhhhhllll - */ - MOV(dst_hf, x); -} - -void -gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src) -{ - assert(dst.type == BRW_REGISTER_TYPE_F); - assert(src.type == BRW_REGISTER_TYPE_UD); - - struct brw_reg src_hf = ud_reg_to_hf(src); - - /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll. - * For the Y case, we wish to access only the upper word; therefore - * a 16-bit subregister offset is needed. - */ - assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X || - inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y); - if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y) - src_hf.subnr += 2; - - MOV(dst, src_hf); -} - -void -gen8_fs_generator::generate_untyped_atomic(fs_inst *ir, - struct brw_reg dst, - struct brw_reg atomic_op, - struct brw_reg surf_index) -{ - assert(atomic_op.file == BRW_IMMEDIATE_VALUE && - atomic_op.type == BRW_REGISTER_TYPE_UD && - surf_index.file == BRW_IMMEDIATE_VALUE && - surf_index.type == BRW_REGISTER_TYPE_UD); - assert((atomic_op.dw1.ud & ~0xf) == 0); - - unsigned msg_control = - atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */ - ((dispatch_width == 16 ? 0 : 1) << 4) | /* SIMD Mode */ - (1 << 5); /* Return data expected */ - - gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); - gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf), - BRW_REGISTER_TYPE_UD)); - gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1, - surf_index.dw1.ud, - HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP, - msg_control, - ir->mlen, - dispatch_width / 8, - ir->header_present, - false); - - brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); -} - -void -gen8_fs_generator::generate_untyped_surface_read(fs_inst *ir, - struct brw_reg dst, - struct brw_reg surf_index) -{ - assert(surf_index.file == BRW_IMMEDIATE_VALUE && - surf_index.type == BRW_REGISTER_TYPE_UD); - - unsigned msg_control = 0xe | /* Enable only the R channel */ - ((dispatch_width == 16 ? 1 : 2) << 4); /* SIMD Mode */ - - gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); - gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf), - BRW_REGISTER_TYPE_UD)); - gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1, - surf_index.dw1.ud, - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ, - msg_control, - ir->mlen, - dispatch_width / 8, - ir->header_present, - false); - - brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); -} - -void -gen8_fs_generator::generate_code(exec_list *instructions) -{ - int start_offset = next_inst_offset; - - struct annotation_info annotation; - memset(&annotation, 0, sizeof(annotation)); - - cfg_t *cfg = NULL; - if (unlikely(INTEL_DEBUG & DEBUG_WM)) - cfg = new(mem_ctx) cfg_t(instructions); - - foreach_in_list(fs_inst, ir, instructions) { - struct brw_reg src[3], dst; - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) - annotate(brw, &annotation, cfg, ir, next_inst_offset); - - for (unsigned int i = 0; i < 3; i++) { - src[i] = brw_reg_from_fs_reg(&ir->src[i]); - - /* The accumulator result appears to get used for the - * conditional modifier generation. When negating a UD - * value, there is a 33rd bit generated for the sign in the - * accumulator value, so now you can't check, for example, - * equality with a 32-bit value. See piglit fs-op-neg-uvec4. - */ - assert(!ir->conditional_mod || - ir->src[i].type != BRW_REGISTER_TYPE_UD || - !ir->src[i].negate); - } - dst = brw_reg_from_fs_reg(&ir->dst); - - default_state.conditional_mod = ir->conditional_mod; - default_state.predicate = ir->predicate; - default_state.predicate_inverse = ir->predicate_inverse; - default_state.saturate = ir->saturate; - default_state.mask_control = ir->force_writemask_all; - default_state.flag_subreg_nr = ir->flag_subreg; - - if (dispatch_width == 16 && !ir->force_uncompressed && !ir->force_sechalf) - default_state.exec_size = BRW_EXECUTE_16; - else - default_state.exec_size = BRW_EXECUTE_8; - - if (ir->force_uncompressed || dispatch_width == 8) - default_state.qtr_control = GEN6_COMPRESSION_1Q; - else if (ir->force_sechalf) - default_state.qtr_control = GEN6_COMPRESSION_2Q; - else - default_state.qtr_control = GEN6_COMPRESSION_1H; - - switch (ir->opcode) { - case BRW_OPCODE_MOV: - MOV(dst, src[0]); - break; - case BRW_OPCODE_ADD: - ADD(dst, src[0], src[1]); - break; - case BRW_OPCODE_MUL: - MUL(dst, src[0], src[1]); - break; - case BRW_OPCODE_MACH: - MACH(dst, src[0], src[1]); - break; - - case BRW_OPCODE_MAD: - default_state.access_mode = BRW_ALIGN_16; - MAD(dst, src[0], src[1], src[2]); - default_state.access_mode = BRW_ALIGN_1; - break; - - case BRW_OPCODE_LRP: - default_state.access_mode = BRW_ALIGN_16; - LRP(dst, src[0], src[1], src[2]); - default_state.access_mode = BRW_ALIGN_1; - break; - - - case BRW_OPCODE_FRC: - FRC(dst, src[0]); - break; - case BRW_OPCODE_RNDD: - RNDD(dst, src[0]); - break; - case BRW_OPCODE_RNDE: - RNDE(dst, src[0]); - break; - case BRW_OPCODE_RNDZ: - RNDZ(dst, src[0]); - break; - - case BRW_OPCODE_AND: - AND(dst, src[0], src[1]); - break; - case BRW_OPCODE_OR: - OR(dst, src[0], src[1]); - break; - case BRW_OPCODE_XOR: - XOR(dst, src[0], src[1]); - break; - case BRW_OPCODE_NOT: - NOT(dst, src[0]); - break; - case BRW_OPCODE_ASR: - ASR(dst, src[0], src[1]); - break; - case BRW_OPCODE_SHR: - SHR(dst, src[0], src[1]); - break; - case BRW_OPCODE_SHL: - SHL(dst, src[0], src[1]); - break; - - case BRW_OPCODE_F32TO16: - MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]); - break; - case BRW_OPCODE_F16TO32: - MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF)); - break; - - case BRW_OPCODE_CMP: - CMP(dst, ir->conditional_mod, src[0], src[1]); - break; - case BRW_OPCODE_SEL: - SEL(dst, src[0], src[1]); - break; - - case BRW_OPCODE_BFREV: - /* BFREV only supports UD type for src and dst. */ - BFREV(retype(dst, BRW_REGISTER_TYPE_UD), - retype(src[0], BRW_REGISTER_TYPE_UD)); - break; - - case BRW_OPCODE_FBH: - /* FBH only supports UD type for dst. */ - FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - - case BRW_OPCODE_FBL: - /* FBL only supports UD type for dst. */ - FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - - case BRW_OPCODE_CBIT: - /* CBIT only supports UD type for dst. */ - CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - - case BRW_OPCODE_ADDC: - ADDC(dst, src[0], src[1]); - break; - - case BRW_OPCODE_SUBB: - SUBB(dst, src[0], src[1]); - break; - - case BRW_OPCODE_BFE: - default_state.access_mode = BRW_ALIGN_16; - BFE(dst, src[0], src[1], src[2]); - default_state.access_mode = BRW_ALIGN_1; - break; - - case BRW_OPCODE_BFI1: - BFI1(dst, src[0], src[1]); - break; - - case BRW_OPCODE_BFI2: - default_state.access_mode = BRW_ALIGN_16; - BFI2(dst, src[0], src[1], src[2]); - default_state.access_mode = BRW_ALIGN_1; - break; - - case BRW_OPCODE_IF: - IF(BRW_PREDICATE_NORMAL); - break; - - case BRW_OPCODE_ELSE: - ELSE(); - break; - - case BRW_OPCODE_ENDIF: - ENDIF(); - break; - - case BRW_OPCODE_DO: - DO(); - break; - - case BRW_OPCODE_BREAK: - BREAK(); - break; - - case BRW_OPCODE_CONTINUE: - CONTINUE(); - break; - - case BRW_OPCODE_WHILE: - WHILE(); - break; - - case SHADER_OPCODE_RCP: - MATH(BRW_MATH_FUNCTION_INV, dst, src[0]); - break; - - case SHADER_OPCODE_RSQ: - MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]); - break; - - case SHADER_OPCODE_SQRT: - MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]); - break; - - case SHADER_OPCODE_EXP2: - MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]); - break; - - case SHADER_OPCODE_LOG2: - MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]); - break; - - case SHADER_OPCODE_SIN: - MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]); - break; - - case SHADER_OPCODE_COS: - MATH(BRW_MATH_FUNCTION_COS, dst, src[0]); - break; - - case SHADER_OPCODE_INT_QUOTIENT: - MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_INT_REMAINDER: - MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_POW: - MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]); - break; - - case FS_OPCODE_PIXEL_X: - case FS_OPCODE_PIXEL_Y: - unreachable("FS_OPCODE_PIXEL_X and FS_OPCODE_PIXEL_Y are only for Gen4-5."); - - case FS_OPCODE_CINTERP: - MOV(dst, src[0]); - break; - case FS_OPCODE_LINTERP: - generate_linterp(ir, dst, src); - break; - case SHADER_OPCODE_TEX: - case FS_OPCODE_TXB: - case SHADER_OPCODE_TXD: - case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXF_UMS: - case SHADER_OPCODE_TXF_MCS: - case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXS: - case SHADER_OPCODE_LOD: - case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: - generate_tex(ir, dst, src[0], src[1]); - break; - - case FS_OPCODE_DDX: - generate_ddx(ir, dst, src[0]); - break; - case FS_OPCODE_DDY: - /* Make sure fp->UsesDFdy flag got set (otherwise there's no - * guarantee that key->render_to_fbo is set). - */ - assert(fp->UsesDFdy); - generate_ddy(ir, dst, src[0], key->render_to_fbo); - break; - - case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - generate_scratch_write(ir, src[0]); - break; - - case SHADER_OPCODE_GEN4_SCRATCH_READ: - generate_scratch_read(ir, dst); - break; - - case SHADER_OPCODE_GEN7_SCRATCH_READ: - generate_scratch_read_gen7(ir, dst); - break; - - case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: - generate_uniform_pull_constant_load(ir, dst, src[0], src[1]); - break; - - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - generate_varying_pull_constant_load(ir, dst, src[0], src[1]); - break; - - case FS_OPCODE_FB_WRITE: - generate_fb_write(ir); - break; - - case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: - generate_mov_dispatch_to_flags(ir); - break; - - case FS_OPCODE_DISCARD_JUMP: - generate_discard_jump(ir); - break; - - case SHADER_OPCODE_SHADER_TIME_ADD: - unreachable("XXX: Missing Gen8 scalar support for INTEL_DEBUG=shader_time"); - - case SHADER_OPCODE_UNTYPED_ATOMIC: - generate_untyped_atomic(ir, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_UNTYPED_SURFACE_READ: - generate_untyped_surface_read(ir, dst, src[0]); - break; - - case FS_OPCODE_SET_SIMD4X2_OFFSET: - generate_set_simd4x2_offset(ir, dst, src[0]); - break; - - case FS_OPCODE_SET_OMASK: - generate_set_omask(ir, dst, src[0]); - break; - - case FS_OPCODE_SET_SAMPLE_ID: - generate_set_sample_id(ir, dst, src[0], src[1]); - break; - - case FS_OPCODE_PACK_HALF_2x16_SPLIT: - generate_pack_half_2x16_split(ir, dst, src[0], src[1]); - break; - - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: - generate_unpack_half_2x16_split(ir, dst, src[0]); - break; - - case FS_OPCODE_PLACEHOLDER_HALT: - /* This is the place where the final HALT needs to be inserted if - * we've emitted any discards. If not, this will emit no code. - */ - if (!patch_discard_jumps_to_fb_writes()) { - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - annotation.ann_count--; - } - } - break; - - default: - if (ir->opcode < int(ARRAY_SIZE(opcode_descs))) { - _mesa_problem(ctx, "Unsupported opcode `%s' in FS", - opcode_descs[ir->opcode].name); - } else { - _mesa_problem(ctx, "Unsupported opcode %d in FS", ir->opcode); - } - abort(); - } - } - - patch_jump_targets(); - annotation_finalize(&annotation, next_inst_offset); - - int before_size = next_inst_offset - start_offset; - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - if (shader_prog) { - fprintf(stderr, - "Native code for %s fragment shader %d (SIMD%d dispatch):\n", - shader_prog->Label ? shader_prog->Label : "unnamed", - shader_prog->Name, dispatch_width); - } else if (fp) { - fprintf(stderr, - "Native code for fragment program %d (SIMD%d dispatch):\n", - prog->Id, dispatch_width); - } else { - fprintf(stderr, "Native code for blorp program (SIMD%d dispatch):\n", - dispatch_width); - } - fprintf(stderr, "SIMD%d shader: %d instructions.\n", - dispatch_width, before_size / 16); - - dump_assembly(store, annotation.ann_count, annotation.ann, brw, prog); - ralloc_free(annotation.ann); - } -} - -const unsigned * -gen8_fs_generator::generate_assembly(exec_list *simd8_instructions, - exec_list *simd16_instructions, - unsigned *assembly_size) -{ - assert(simd8_instructions || simd16_instructions); - - if (simd8_instructions) { - dispatch_width = 8; - generate_code(simd8_instructions); - } - - if (simd16_instructions) { - /* Align to a 64-byte boundary. */ - while (next_inst_offset % 64) - NOP(); - - /* Save off the start of this SIMD16 program */ - prog_data->prog_offset_16 = next_inst_offset; - - dispatch_width = 16; - generate_code(simd16_instructions); - } - - *assembly_size = next_inst_offset; - return (const unsigned *) store; -} diff --git a/src/mesa/drivers/dri/i965/gen8_generator.cpp b/src/mesa/drivers/dri/i965/gen8_generator.cpp deleted file mode 100644 index e837dc3e403..00000000000 --- a/src/mesa/drivers/dri/i965/gen8_generator.cpp +++ /dev/null @@ -1,620 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file gen8_generator.cpp - * - * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer. - */ - -extern "C" { -#include "main/compiler.h" -#include "main/macros.h" -#include "brw_context.h" -} /* extern "C" */ - -#include "util/ralloc.h" -#include "brw_eu.h" -#include "brw_reg.h" -#include "gen8_generator.h" - -gen8_generator::gen8_generator(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - void *mem_ctx) - : shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx) -{ - ctx = &brw->ctx; - - memset(&default_state, 0, sizeof(default_state)); - default_state.mask_control = BRW_MASK_ENABLE; - - store_size = 1024; - store = rzalloc_array(mem_ctx, gen8_instruction, store_size); - nr_inst = 0; - next_inst_offset = 0; - - /* Set up the control flow stacks. */ - if_stack_depth = 0; - if_stack_array_size = 16; - if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size); - - loop_stack_depth = 0; - loop_stack_array_size = 16; - loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size); -} - -gen8_generator::~gen8_generator() -{ -} - -gen8_instruction * -gen8_generator::next_inst(unsigned opcode) -{ - gen8_instruction *inst; - - if (nr_inst + 1 > unsigned(store_size)) { - store_size <<= 1; - store = reralloc(mem_ctx, store, gen8_instruction, store_size); - assert(store); - } - - next_inst_offset += 16; - inst = &store[nr_inst++]; - - memset(inst, 0, sizeof(gen8_instruction)); - - gen8_set_opcode(inst, opcode); - gen8_set_exec_size(inst, default_state.exec_size); - gen8_set_access_mode(inst, default_state.access_mode); - gen8_set_mask_control(inst, default_state.mask_control); - gen8_set_qtr_control(inst, default_state.qtr_control); - gen8_set_cond_modifier(inst, default_state.conditional_mod); - gen8_set_pred_control(inst, default_state.predicate); - gen8_set_pred_inv(inst, default_state.predicate_inverse); - gen8_set_saturate(inst, default_state.saturate); - gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr); - return inst; -} - -#define ALU1(OP) \ -gen8_instruction * \ -gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \ -{ \ - gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ - gen8_set_dst(brw, inst, dst); \ - gen8_set_src0(brw, inst, src); \ - return inst; \ -} - -#define ALU2(OP) \ -gen8_instruction * \ -gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \ -{ \ - gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ - gen8_set_dst(brw, inst, dst); \ - gen8_set_src0(brw, inst, s0); \ - gen8_set_src1(brw, inst, s1); \ - return inst; \ -} - -#define ALU2_ACCUMULATE(OP) \ -gen8_instruction * \ -gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \ -{ \ - gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ - gen8_set_dst(brw, inst, dst); \ - gen8_set_src0(brw, inst, s0); \ - gen8_set_src1(brw, inst, s1); \ - gen8_set_acc_wr_control(inst, true); \ - return inst; \ -} - -#define ALU3(OP) \ -gen8_instruction * \ -gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \ - struct brw_reg s1, struct brw_reg s2) \ -{ \ - return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \ -} - -#define ALU3F(OP) \ -gen8_instruction * \ -gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \ - struct brw_reg s1, struct brw_reg s2) \ -{ \ - assert(dst.type == BRW_REGISTER_TYPE_F); \ - assert(s0.type == BRW_REGISTER_TYPE_F); \ - assert(s1.type == BRW_REGISTER_TYPE_F); \ - assert(s2.type == BRW_REGISTER_TYPE_F); \ - return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \ -} - -ALU2(ADD) -ALU2(AND) -ALU2(ASR) -ALU3(BFE) -ALU2(BFI1) -ALU3(BFI2) -ALU1(BFREV) -ALU1(CBIT) -ALU2_ACCUMULATE(ADDC) -ALU2_ACCUMULATE(SUBB) -ALU2(DP2) -ALU2(DP3) -ALU2(DP4) -ALU2(DPH) -ALU1(FBH) -ALU1(FBL) -ALU1(FRC) -ALU2(LINE) -ALU3F(LRP) -ALU3F(MAD) -ALU2(MUL) -ALU1(MOV) -ALU1(NOT) -ALU2(OR) -ALU2(PLN) -ALU1(RNDD) -ALU1(RNDE) -ALU1(RNDZ) -ALU2_ACCUMULATE(MAC) -ALU2_ACCUMULATE(MACH) -ALU2(SEL) -ALU2(SHL) -ALU2(SHR) -ALU2(XOR) - -gen8_instruction * -gen8_generator::CMP(struct brw_reg dst, unsigned conditional, - struct brw_reg src0, struct brw_reg src1) -{ - gen8_instruction *inst = next_inst(BRW_OPCODE_CMP); - gen8_set_cond_modifier(inst, conditional); - /* The CMP instruction appears to behave erratically for floating point - * sources unless the destination type is also float. Overriding it to - * match src0 makes it work in all cases. - */ - dst.type = src0.type; - gen8_set_dst(brw, inst, dst); - gen8_set_src0(brw, inst, src0); - gen8_set_src1(brw, inst, src1); - return inst; -} - -static int -get_3src_subreg_nr(struct brw_reg reg) -{ - if (reg.vstride == BRW_VERTICAL_STRIDE_0) { - assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); - return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); - } else { - return reg.subnr / 4; - } -} - -gen8_instruction * -gen8_generator::alu3(unsigned opcode, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1, - struct brw_reg src2) -{ - /* MRFs haven't existed since Gen7, so we better not be using them. */ - if (dst.file == BRW_MESSAGE_REGISTER_FILE) { - dst.file = BRW_GENERAL_REGISTER_FILE; - dst.nr += GEN7_MRF_HACK_START; - } - - gen8_instruction *inst = next_inst(opcode); - assert(gen8_access_mode(inst) == BRW_ALIGN_16); - - assert(dst.file == BRW_GENERAL_REGISTER_FILE); - assert(dst.nr < 128); - assert(dst.address_mode == BRW_ADDRESS_DIRECT); - assert(dst.type == BRW_REGISTER_TYPE_F || - dst.type == BRW_REGISTER_TYPE_D || - dst.type == BRW_REGISTER_TYPE_UD); - gen8_set_dst_3src_reg_nr(inst, dst.nr); - gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16); - gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask); - - assert(src0.file == BRW_GENERAL_REGISTER_FILE); - assert(src0.address_mode == BRW_ADDRESS_DIRECT); - assert(src0.nr < 128); - gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle); - gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0)); - gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0); - gen8_set_src0_3src_reg_nr(inst, src0.nr); - gen8_set_src0_3src_abs(inst, src0.abs); - gen8_set_src0_3src_negate(inst, src0.negate); - - assert(src1.file == BRW_GENERAL_REGISTER_FILE); - assert(src1.address_mode == BRW_ADDRESS_DIRECT); - assert(src1.nr < 128); - gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle); - gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1)); - gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0); - gen8_set_src1_3src_reg_nr(inst, src1.nr); - gen8_set_src1_3src_abs(inst, src1.abs); - gen8_set_src1_3src_negate(inst, src1.negate); - - assert(src2.file == BRW_GENERAL_REGISTER_FILE); - assert(src2.address_mode == BRW_ADDRESS_DIRECT); - assert(src2.nr < 128); - gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle); - gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2)); - gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0); - gen8_set_src2_3src_reg_nr(inst, src2.nr); - gen8_set_src2_3src_abs(inst, src2.abs); - gen8_set_src2_3src_negate(inst, src2.negate); - - /* Set both the source and destination types based on dst.type, ignoring - * the source register types. The MAD and LRP emitters both ensure that - * all register types are float. The BFE and BFI2 emitters, however, may - * send us mixed D and UD source types and want us to ignore that. - */ - switch (dst.type) { - case BRW_REGISTER_TYPE_F: - gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F); - gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F); - break; - case BRW_REGISTER_TYPE_D: - gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D); - gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D); - break; - case BRW_REGISTER_TYPE_UD: - gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD); - gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD); - break; - } - - return inst; -} - -gen8_instruction * -gen8_generator::math(unsigned math_function, - struct brw_reg dst, - struct brw_reg src0) -{ - gen8_instruction *inst = next_inst(BRW_OPCODE_MATH); - - assert(src0.hstride == 0 || src0.hstride == dst.hstride); - - gen8_set_math_function(inst, math_function); - gen8_set_dst(brw, inst, dst); - gen8_set_src0(brw, inst, src0); - return inst; -} - -gen8_instruction * -gen8_generator::MATH(unsigned math_function, - struct brw_reg dst, - struct brw_reg src0) -{ - assert(src0.type == BRW_REGISTER_TYPE_F); - gen8_instruction *inst = math(math_function, dst, src0); - return inst; -} - -gen8_instruction * -gen8_generator::MATH(unsigned math_function, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) -{ - bool int_math = - math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || - math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || - math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER; - - if (int_math) { - assert(src0.type != BRW_REGISTER_TYPE_F); - assert(src1.type != BRW_REGISTER_TYPE_F); - } else { - assert(src0.type == BRW_REGISTER_TYPE_F); - } - - gen8_instruction *inst = math(math_function, dst, src0); - gen8_set_src1(brw, inst, src1); - return inst; -} - -gen8_instruction * -gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0) -{ - gen8_instruction *inst = next_inst(BRW_OPCODE_MOV); - gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); - gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD)); - gen8_set_mask_control(inst, BRW_MASK_DISABLE); - - return inst; -} - - -gen8_instruction * -gen8_generator::NOP() -{ - return next_inst(BRW_OPCODE_NOP); -} - -void -gen8_generator::push_if_stack(gen8_instruction *inst) -{ - if_stack[if_stack_depth] = inst - store; - - ++if_stack_depth; - if (if_stack_array_size <= if_stack_depth) { - if_stack_array_size *= 2; - if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size); - } -} - -gen8_instruction * -gen8_generator::pop_if_stack() -{ - --if_stack_depth; - return &store[if_stack[if_stack_depth]]; -} - -/** - * Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.) - */ -void -gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst, - gen8_instruction *else_inst, - gen8_instruction *endif_inst) -{ - assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF); - assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE); - assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF); - - gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst)); - - if (else_inst == NULL) { - /* Patch IF -> ENDIF */ - gen8_set_jip(if_inst, 16 * (endif_inst - if_inst)); - gen8_set_uip(if_inst, 16 * (endif_inst - if_inst)); - } else { - gen8_set_exec_size(else_inst, gen8_exec_size(if_inst)); - - /* Patch IF -> ELSE and ELSE -> ENDIF: - * - * The IF's JIP should point at the instruction after the ELSE. - * The IF's UIP should point to the ENDIF. - * - * Both are expressed in bytes, hence the multiply by 16...128-bits. - */ - gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1)); - gen8_set_uip(if_inst, 16 * (endif_inst - if_inst)); - - /* Patch ELSE -> ENDIF: - * - * Since we don't set branch_ctrl, both JIP and UIP point to ENDIF. - */ - gen8_set_jip(else_inst, 16 * (endif_inst - else_inst)); - gen8_set_uip(else_inst, 16 * (endif_inst - else_inst)); - } - gen8_set_jip(endif_inst, 16); -} - -gen8_instruction * -gen8_generator::IF(unsigned predicate) -{ - gen8_instruction *inst = next_inst(BRW_OPCODE_IF); - gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - gen8_set_src0(brw, inst, brw_imm_d(0)); - gen8_set_exec_size(inst, default_state.exec_size); - gen8_set_pred_control(inst, predicate); - gen8_set_mask_control(inst, BRW_MASK_ENABLE); - push_if_stack(inst); - - return inst; -} - -gen8_instruction * -gen8_generator::ELSE() -{ - gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE); - gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - gen8_set_src0(brw, inst, brw_imm_d(0)); - gen8_set_mask_control(inst, BRW_MASK_ENABLE); - push_if_stack(inst); - return inst; -} - -gen8_instruction * -gen8_generator::ENDIF() -{ - gen8_instruction *if_inst = NULL; - gen8_instruction *else_inst = NULL; - - gen8_instruction *tmp = pop_if_stack(); - if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) { - else_inst = tmp; - tmp = pop_if_stack(); - } - assert(gen8_opcode(tmp) == BRW_OPCODE_IF); - if_inst = tmp; - - gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF); - gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE); - gen8_set_src0(brw, endif_inst, brw_imm_d(0)); - patch_IF_ELSE(if_inst, else_inst, endif_inst); - - return endif_inst; -} - -unsigned -gen8_generator::next_ip(unsigned ip) const -{ - return ip + 16; -} - -unsigned -gen8_generator::find_next_block_end(unsigned start) const -{ - for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) { - gen8_instruction *inst = &store[ip / 16]; - - switch (gen8_opcode(inst)) { - case BRW_OPCODE_ENDIF: - case BRW_OPCODE_ELSE: - case BRW_OPCODE_WHILE: - case BRW_OPCODE_HALT: - return ip; - } - } - - return 0; -} - -/* There is no DO instruction on Gen6+, so to find the end of the loop - * we have to see if the loop is jumping back before our start - * instruction. - */ -unsigned -gen8_generator::find_loop_end(unsigned start) const -{ - /* Always start after the instruction (such as a WHILE) we're trying to fix - * up. - */ - for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) { - gen8_instruction *inst = &store[ip / 16]; - - if (gen8_opcode(inst) == BRW_OPCODE_WHILE) { - if (ip + gen8_jip(inst) <= start) - return ip; - } - } - unreachable("not reached"); -} - -/* After program generation, go back and update the UIP and JIP of - * BREAK, CONT, and HALT instructions to their correct locations. - */ -void -gen8_generator::patch_jump_targets() -{ - for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) { - gen8_instruction *inst = &store[ip / 16]; - - int block_end_ip = find_next_block_end(ip); - switch (gen8_opcode(inst)) { - case BRW_OPCODE_BREAK: - assert(block_end_ip != 0); - gen8_set_jip(inst, block_end_ip - ip); - gen8_set_uip(inst, find_loop_end(ip) - ip); - assert(gen8_uip(inst) != 0); - assert(gen8_jip(inst) != 0); - break; - case BRW_OPCODE_CONTINUE: - assert(block_end_ip != 0); - gen8_set_jip(inst, block_end_ip - ip); - gen8_set_uip(inst, find_loop_end(ip) - ip); - assert(gen8_uip(inst) != 0); - assert(gen8_jip(inst) != 0); - break; - case BRW_OPCODE_ENDIF: - if (block_end_ip == 0) - gen8_set_jip(inst, 16); - else - gen8_set_jip(inst, block_end_ip - ip); - break; - case BRW_OPCODE_HALT: - /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): - * - * "In case of the halt instruction not inside any conditional - * code block, the value of and should be the - * same. In case of the halt instruction inside conditional code - * block, the should be the end of the program, and the - * should be end of the most inner conditional code block." - * - * The uip will have already been set by whoever set up the - * instruction. - */ - if (block_end_ip == 0) { - gen8_set_jip(inst, gen8_uip(inst)); - } else { - gen8_set_jip(inst, block_end_ip - ip); - } - assert(gen8_uip(inst) != 0); - assert(gen8_jip(inst) != 0); - break; - } - } -} - -void -gen8_generator::DO() -{ - if (loop_stack_array_size < loop_stack_depth) { - loop_stack_array_size *= 2; - loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size); - } - loop_stack[loop_stack_depth++] = nr_inst; -} - -gen8_instruction * -gen8_generator::BREAK() -{ - gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK); - gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - gen8_set_src0(brw, inst, brw_imm_d(0)); - gen8_set_exec_size(inst, default_state.exec_size); - return inst; -} - -gen8_instruction * -gen8_generator::CONTINUE() -{ - gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE); - gen8_set_dst(brw, inst, brw_ip_reg()); - gen8_set_src0(brw, inst, brw_imm_d(0)); - gen8_set_exec_size(inst, default_state.exec_size); - return inst; -} - -gen8_instruction * -gen8_generator::WHILE() -{ - gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]]; - gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE); - - gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - gen8_set_src0(brw, while_inst, brw_imm_d(0)); - gen8_set_jip(while_inst, 16 * (do_inst - while_inst)); - gen8_set_exec_size(while_inst, default_state.exec_size); - - return while_inst; -} - -gen8_instruction * -gen8_generator::HALT() -{ - gen8_instruction *inst = next_inst(BRW_OPCODE_HALT); - gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - gen8_set_src0(brw, inst, brw_imm_d(0)); - gen8_set_exec_size(inst, default_state.exec_size); - gen8_set_mask_control(inst, BRW_MASK_DISABLE); - return inst; -} diff --git a/src/mesa/drivers/dri/i965/gen8_generator.h b/src/mesa/drivers/dri/i965/gen8_generator.h deleted file mode 100644 index f91044a34a0..00000000000 --- a/src/mesa/drivers/dri/i965/gen8_generator.h +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file gen8_generator.h - * - * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer. - */ - -#pragma once - -extern "C" { -#include "main/macros.h" -} /* extern "C" */ - -#include "gen8_instruction.h" - -class gen8_generator { -public: - gen8_generator(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - void *mem_ctx); - ~gen8_generator(); - - /** - * Instruction emitters. - * @{ - */ - #define ALU1(OP) \ - gen8_instruction *OP(struct brw_reg dst, struct brw_reg src); - #define ALU2(OP) \ - gen8_instruction *OP(struct brw_reg d, struct brw_reg, struct brw_reg); - #define ALU3(OP) \ - gen8_instruction *OP(struct brw_reg d, \ - struct brw_reg, struct brw_reg, struct brw_reg); - ALU2(ADD) - ALU2(AND) - ALU2(ASR) - ALU3(BFE) - ALU2(BFI1) - ALU3(BFI2) - ALU1(F32TO16) - ALU1(F16TO32) - ALU1(BFREV) - ALU1(CBIT) - ALU2(ADDC) - ALU2(SUBB) - ALU2(DP2) - ALU2(DP3) - ALU2(DP4) - ALU2(DPH) - ALU1(FBH) - ALU1(FBL) - ALU1(FRC) - ALU2(LINE) - ALU3(LRP) - ALU2(MAC) - ALU2(MACH) - ALU3(MAD) - ALU2(MUL) - ALU1(MOV) - ALU1(MOV_RAW) - ALU1(NOT) - ALU2(OR) - ALU2(PLN) - ALU1(RNDD) - ALU1(RNDE) - ALU1(RNDZ) - ALU2(SEL) - ALU2(SHL) - ALU2(SHR) - ALU2(XOR) - #undef ALU1 - #undef ALU2 - #undef ALU3 - - gen8_instruction *CMP(struct brw_reg dst, unsigned conditional, - struct brw_reg src0, struct brw_reg src1); - gen8_instruction *IF(unsigned predicate); - gen8_instruction *ELSE(); - gen8_instruction *ENDIF(); - void DO(); - gen8_instruction *BREAK(); - gen8_instruction *CONTINUE(); - gen8_instruction *WHILE(); - - gen8_instruction *HALT(); - - gen8_instruction *MATH(unsigned math_function, - struct brw_reg dst, - struct brw_reg src0); - gen8_instruction *MATH(unsigned math_function, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - gen8_instruction *NOP(); - /** @} */ - -protected: - gen8_instruction *alu3(unsigned opcode, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1, - struct brw_reg src2); - - gen8_instruction *math(unsigned math_function, - struct brw_reg dst, - struct brw_reg src0); - - gen8_instruction *next_inst(unsigned opcode); - - struct gl_shader_program *shader_prog; - struct gl_program *prog; - - struct brw_context *brw; - struct intel_context *intel; - struct gl_context *ctx; - - gen8_instruction *store; - unsigned store_size; - unsigned nr_inst; - unsigned next_inst_offset; - - /** - * Control flow stacks: - * - * if_stack contains IF and ELSE instructions which must be patched with - * the final jump offsets (and popped) once the matching ENDIF is encountered. - * - * We actually store an array index into the store, rather than pointers - * to the instructions. This is necessary since we may realloc the store. - * - * @{ - */ - int *if_stack; - int if_stack_depth; - int if_stack_array_size; - - int *loop_stack; - int loop_stack_depth; - int loop_stack_array_size; - - int if_depth_in_loop; - - void push_if_stack(gen8_instruction *inst); - gen8_instruction *pop_if_stack(); - /** @} */ - - void patch_IF_ELSE(gen8_instruction *if_inst, - gen8_instruction *else_inst, - gen8_instruction *endif_inst); - - unsigned next_ip(unsigned ip) const; - unsigned find_next_block_end(unsigned start_ip) const; - unsigned find_loop_end(unsigned start) const; - - void patch_jump_targets(); - - /** - * Default state for new instructions. - */ - struct { - unsigned exec_size; - unsigned access_mode; - unsigned mask_control; - unsigned qtr_control; - unsigned flag_subreg_nr; - unsigned conditional_mod; - unsigned predicate; - bool predicate_inverse; - bool saturate; - } default_state; - - void *mem_ctx; -}; diff --git a/src/mesa/drivers/dri/i965/gen8_instruction.c b/src/mesa/drivers/dri/i965/gen8_instruction.c deleted file mode 100644 index 47955e12722..00000000000 --- a/src/mesa/drivers/dri/i965/gen8_instruction.c +++ /dev/null @@ -1,458 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file gen8_instruction.c - * - * A representation of a Gen8+ EU instruction, with helper methods to get - * and set various fields. This is the actual hardware format. - */ - -#include "main/compiler.h" -#include "brw_defines.h" -#include "gen8_instruction.h" - -static void -gen8_convert_mrf_to_grf(struct brw_reg *reg) -{ - /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"): - * "The send with EOT should use register space R112-R127 for . This is - * to enable loading of a new thread into the same slot while the message - * with EOT for current thread is pending dispatch." - * - * Since we're pretending to have 16 MRFs anyway, we may as well use the - * registers required for messages with EOT. - */ - if (reg->file == BRW_MESSAGE_REGISTER_FILE) { - reg->file = BRW_GENERAL_REGISTER_FILE; - reg->nr += GEN7_MRF_HACK_START; - } -} - -void -gen8_set_dst(const struct brw_context *brw, - struct gen8_instruction *inst, - struct brw_reg reg) -{ - gen8_convert_mrf_to_grf(®); - - if (reg.file == BRW_GENERAL_REGISTER_FILE) - assert(reg.nr < BRW_MAX_GRF); - - gen8_set_dst_reg_file(inst, reg.file); - gen8_set_dst_reg_type(inst, brw_reg_type_to_hw_type(brw, reg.type, reg.file)); - gen8_set_dst_address_mode(inst, reg.address_mode); - - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - gen8_set_dst_da_reg_nr(inst, reg.nr); - - if (gen8_access_mode(inst) == BRW_ALIGN_1) { - /* Set Dst.SubRegNum[4:0] */ - gen8_set_dst_da1_subreg_nr(inst, reg.subnr); - - /* Set Dst.HorzStride */ - if (reg.hstride == BRW_HORIZONTAL_STRIDE_0) - reg.hstride = BRW_HORIZONTAL_STRIDE_1; - gen8_set_dst_da1_hstride(inst, reg.hstride); - } else { - /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */ - assert(reg.subnr == 0 || reg.subnr == 16); - gen8_set_dst_da16_subreg_nr(inst, reg.subnr >> 4); - gen8_set_da16_writemask(inst, reg.dw1.bits.writemask); - } - } else { - /* Indirect addressing */ - assert(gen8_access_mode(inst) == BRW_ALIGN_1); - - /* Set Dst.HorzStride */ - if (reg.hstride == BRW_HORIZONTAL_STRIDE_0) - reg.hstride = BRW_HORIZONTAL_STRIDE_1; - gen8_set_dst_da1_hstride(inst, reg.hstride); - gen8_set_dst_ia1_subreg_nr(inst, reg.subnr); - gen8_set_dst_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset); - } - - /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8) - * or 16 (SIMD16), as that's normally correct. However, when dealing with - * small registers, we automatically reduce it to match the register size. - */ - if (reg.width < BRW_EXECUTE_8) - gen8_set_exec_size(inst, reg.width); -} - -static void -gen8_validate_reg(struct gen8_instruction *inst, struct brw_reg reg) -{ - int hstride_for_reg[] = {0, 1, 2, 4}; - int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; - int width_for_reg[] = {1, 2, 4, 8, 16}; - int execsize_for_reg[] = {1, 2, 4, 8, 16}; - int width, hstride, vstride, execsize; - - if (reg.file == BRW_IMMEDIATE_VALUE) { - /* TODO: check immediate vectors */ - return; - } - - if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE) - return; - - assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); - hstride = hstride_for_reg[reg.hstride]; - - if (reg.vstride == 0xf) { - vstride = -1; - } else { - assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg)); - vstride = vstride_for_reg[reg.vstride]; - } - - assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg)); - width = width_for_reg[reg.width]; - - assert(gen8_exec_size(inst) >= 0 && - gen8_exec_size(inst) < ARRAY_SIZE(execsize_for_reg)); - execsize = execsize_for_reg[gen8_exec_size(inst)]; - - /* Restrictions from 3.3.10: Register Region Restrictions. */ - /* 3. */ - assert(execsize >= width); - - /* 4. */ - if (execsize == width && hstride != 0) { - assert(vstride == -1 || vstride == width * hstride); - } - - /* 5. */ - if (execsize == width && hstride == 0) { - /* no restriction on vstride. */ - } - - /* 6. */ - if (width == 1) { - assert(hstride == 0); - } - - /* 7. */ - if (execsize == 1 && width == 1) { - assert(hstride == 0); - assert(vstride == 0); - } - - /* 8. */ - if (vstride == 0 && hstride == 0) { - assert(width == 1); - } - - /* 10. Check destination issues. */ -} - -void -gen8_set_src0(const struct brw_context *brw, - struct gen8_instruction *inst, - struct brw_reg reg) -{ - gen8_convert_mrf_to_grf(®); - - if (reg.file == BRW_GENERAL_REGISTER_FILE) - assert(reg.nr < BRW_MAX_GRF); - - gen8_validate_reg(inst, reg); - - gen8_set_src0_reg_file(inst, reg.file); - gen8_set_src0_reg_type(inst, - brw_reg_type_to_hw_type(brw, reg.type, reg.file)); - gen8_set_src0_abs(inst, reg.abs); - gen8_set_src0_negate(inst, reg.negate); - - if (reg.file == BRW_IMMEDIATE_VALUE) { - inst->data[3] = reg.dw1.ud; - - /* Required to set some fields in src1 as well: */ - gen8_set_src1_reg_file(inst, BRW_ARCHITECTURE_REGISTER_FILE); - gen8_set_src1_reg_type(inst, - brw_reg_type_to_hw_type(brw, reg.type, reg.file)); - return; - } - - gen8_set_src0_address_mode(inst, reg.address_mode); - - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - gen8_set_src0_da_reg_nr(inst, reg.nr); - - if (gen8_access_mode(inst) == BRW_ALIGN_1) { - /* Set Src0.SubRegNum[4:0] */ - gen8_set_src0_da1_subreg_nr(inst, reg.subnr); - - if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) { - gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0); - gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0); - } else { - gen8_set_src0_da1_hstride(inst, reg.hstride); - gen8_set_src0_vert_stride(inst, reg.vstride); - } - gen8_set_src0_da1_width(inst, reg.width); - - } else { - /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */ - assert(reg.subnr == 0 || reg.subnr == 16); - gen8_set_src0_da16_subreg_nr(inst, reg.subnr >> 4); - - gen8_set_src0_da16_swiz_x(inst, - BRW_GET_SWZ(reg.dw1.bits.swizzle, - BRW_CHANNEL_X)); - gen8_set_src0_da16_swiz_y(inst, - BRW_GET_SWZ(reg.dw1.bits.swizzle, - BRW_CHANNEL_Y)); - gen8_set_src0_da16_swiz_z(inst, - BRW_GET_SWZ(reg.dw1.bits.swizzle, - BRW_CHANNEL_Z)); - gen8_set_src0_da16_swiz_w(inst, - BRW_GET_SWZ(reg.dw1.bits.swizzle, - BRW_CHANNEL_W)); - - /* This is an oddity of the fact that we're using the same - * descriptions for registers in both Align16 and Align1 modes. - */ - if (reg.vstride == BRW_VERTICAL_STRIDE_8) - gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_4); - else - gen8_set_src0_vert_stride(inst, reg.vstride); - } - } else { - /* Indirect addressing */ - assert(gen8_access_mode(inst) == BRW_ALIGN_1); - if (reg.width == BRW_WIDTH_1 && - gen8_exec_size(inst) == BRW_EXECUTE_1) { - gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0); - gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0); - } else { - gen8_set_src0_da1_hstride(inst, reg.hstride); - gen8_set_src0_vert_stride(inst, reg.vstride); - } - - gen8_set_src0_da1_width(inst, reg.width); - gen8_set_src0_ia1_subreg_nr(inst, reg.subnr); - gen8_set_src0_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset); - } -} - -void -gen8_set_src1(const struct brw_context *brw, - struct gen8_instruction *inst, - struct brw_reg reg) -{ - gen8_convert_mrf_to_grf(®); - - if (reg.file == BRW_GENERAL_REGISTER_FILE) - assert(reg.nr < BRW_MAX_GRF); - - gen8_validate_reg(inst, reg); - - gen8_set_src1_reg_file(inst, reg.file); - gen8_set_src1_reg_type(inst, - brw_reg_type_to_hw_type(brw, reg.type, reg.file)); - gen8_set_src1_abs(inst, reg.abs); - gen8_set_src1_negate(inst, reg.negate); - - /* Only src1 can be an immediate in two-argument instructions. */ - assert(gen8_src0_reg_file(inst) != BRW_IMMEDIATE_VALUE); - - if (reg.file == BRW_IMMEDIATE_VALUE) { - inst->data[3] = reg.dw1.ud; - return; - } - - gen8_set_src1_address_mode(inst, reg.address_mode); - - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - gen8_set_src1_da_reg_nr(inst, reg.nr); - - if (gen8_access_mode(inst) == BRW_ALIGN_1) { - /* Set Src0.SubRegNum[4:0] */ - gen8_set_src1_da1_subreg_nr(inst, reg.subnr); - - if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) { - gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0); - gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0); - } else { - gen8_set_src1_da1_hstride(inst, reg.hstride); - gen8_set_src1_vert_stride(inst, reg.vstride); - } - gen8_set_src1_da1_width(inst, reg.width); - } else { - /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */ - assert(reg.subnr == 0 || reg.subnr == 16); - gen8_set_src1_da16_subreg_nr(inst, reg.subnr >> 4); - - gen8_set_src1_da16_swiz_x(inst, - BRW_GET_SWZ(reg.dw1.bits.swizzle, - BRW_CHANNEL_X)); - gen8_set_src1_da16_swiz_y(inst, - BRW_GET_SWZ(reg.dw1.bits.swizzle, - BRW_CHANNEL_Y)); - gen8_set_src1_da16_swiz_z(inst, - BRW_GET_SWZ(reg.dw1.bits.swizzle, - BRW_CHANNEL_Z)); - gen8_set_src1_da16_swiz_w(inst, - BRW_GET_SWZ(reg.dw1.bits.swizzle, - BRW_CHANNEL_W)); - - /* This is an oddity of the fact that we're using the same - * descriptions for registers in both Align16 and Align1 modes. - */ - if (reg.vstride == BRW_VERTICAL_STRIDE_8) - gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_4); - else - gen8_set_src1_vert_stride(inst, reg.vstride); - } - } else { - /* Indirect addressing */ - assert(gen8_access_mode(inst) == BRW_ALIGN_1); - if (reg.width == BRW_WIDTH_1 && gen8_exec_size(inst) == BRW_EXECUTE_1) { - gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0); - gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0); - } else { - gen8_set_src1_da1_hstride(inst, reg.hstride); - gen8_set_src1_vert_stride(inst, reg.vstride); - } - - gen8_set_src1_da1_width(inst, reg.width); - gen8_set_src1_ia1_subreg_nr(inst, reg.subnr); - gen8_set_src1_ia1_addr_imm(inst, reg.dw1.bits.indirect_offset); - } -} - -/** - * Set the Message Descriptor and Extended Message Descriptor fields - * for SEND messages. - * - * \note This zeroes out the Function Control bits, so it must be called - * \b before filling out any message-specific data. Callers can - * choose not to fill in irrelevant bits; they will be zero. - */ -static void -gen8_set_message_descriptor(const struct brw_context *brw, - struct gen8_instruction *inst, - enum brw_message_target sfid, - unsigned msg_length, - unsigned response_length, - bool header_present, - bool end_of_thread) -{ - gen8_set_src1(brw, inst, brw_imm_d(0)); - - gen8_set_sfid(inst, sfid); - gen8_set_mlen(inst, msg_length); - gen8_set_rlen(inst, response_length); - gen8_set_header_present(inst, header_present); - gen8_set_eot(inst, end_of_thread); -} - -void -gen8_set_urb_message(const struct brw_context *brw, - struct gen8_instruction *inst, - enum brw_urb_write_flags flags, - unsigned msg_length, - unsigned response_length, - unsigned offset, - bool interleave) -{ - gen8_set_message_descriptor(brw, inst, BRW_SFID_URB, - msg_length, response_length, - true, flags & BRW_URB_WRITE_EOT); - gen8_set_src0(brw, inst, brw_vec8_grf(GEN7_MRF_HACK_START + 1, 0)); - if (flags & BRW_URB_WRITE_OWORD) { - assert(msg_length == 2); - gen8_set_urb_opcode(inst, BRW_URB_OPCODE_WRITE_OWORD); - } else { - gen8_set_urb_opcode(inst, BRW_URB_OPCODE_WRITE_HWORD); - } - gen8_set_urb_global_offset(inst, offset); - gen8_set_urb_interleave(inst, interleave); - gen8_set_urb_per_slot_offset(inst, - flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0); -} - -void -gen8_set_sampler_message(const struct brw_context *brw, - struct gen8_instruction *inst, - unsigned binding_table_index, - unsigned sampler, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - bool header_present, - unsigned simd_mode) -{ - gen8_set_message_descriptor(brw, inst, BRW_SFID_SAMPLER, msg_length, - response_length, header_present, false); - - gen8_set_binding_table_index(inst, binding_table_index); - gen8_set_sampler(inst, sampler); - gen8_set_sampler_msg_type(inst, msg_type); - gen8_set_sampler_simd_mode(inst, simd_mode); -} - -void -gen8_set_dp_message(const struct brw_context *brw, - struct gen8_instruction *inst, - enum brw_message_target sfid, - unsigned binding_table_index, - unsigned msg_type, - unsigned msg_control, - unsigned mlen, - unsigned rlen, - bool header_present, - bool end_of_thread) -{ - gen8_set_message_descriptor(brw, inst, sfid, mlen, rlen, header_present, - end_of_thread); - gen8_set_binding_table_index(inst, binding_table_index); - gen8_set_dp_message_type(inst, msg_type); - gen8_set_dp_message_control(inst, msg_control); -} - -void -gen8_set_dp_scratch_message(const struct brw_context *brw, - struct gen8_instruction *inst, - bool write, - bool dword, - bool invalidate_after_read, - unsigned num_regs, - unsigned addr_offset, - unsigned mlen, - unsigned rlen, - bool header_present, - bool end_of_thread) -{ - assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || num_regs == 8); - gen8_set_message_descriptor(brw, inst, GEN7_SFID_DATAPORT_DATA_CACHE, - mlen, rlen, header_present, end_of_thread); - gen8_set_dp_category(inst, 1); /* Scratch Block Read/Write messages */ - gen8_set_scratch_read_write(inst, write); - gen8_set_scratch_type(inst, dword); - gen8_set_scratch_invalidate_after_read(inst, invalidate_after_read); - gen8_set_scratch_block_size(inst, ffs(num_regs) - 1); - gen8_set_scratch_addr_offset(inst, addr_offset); -} diff --git a/src/mesa/drivers/dri/i965/gen8_instruction.h b/src/mesa/drivers/dri/i965/gen8_instruction.h deleted file mode 100644 index 94f4195d4da..00000000000 --- a/src/mesa/drivers/dri/i965/gen8_instruction.h +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file gen8_instruction.h - * - * A representation of a Gen8+ EU instruction, with helper methods to get - * and set various fields. This is the actual hardware format. - */ - -#ifndef GEN8_INSTRUCTION_H -#define GEN8_INSTRUCTION_H - -#include -#include - -#include "brw_context.h" -#include "brw_reg.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct gen8_instruction { - uint32_t data[4]; -}; - -static inline unsigned gen8_instruction_bits(struct gen8_instruction *inst, - unsigned high, - unsigned low); -static inline void gen8_instruction_set_bits(struct gen8_instruction *inst, - unsigned high, - unsigned low, - unsigned value); - -#define F(name, high, low) \ -static inline void gen8_set_##name(struct gen8_instruction *inst, unsigned v) \ -{ \ - gen8_instruction_set_bits(inst, high, low, v); \ -} \ -static inline unsigned gen8_##name(struct gen8_instruction *inst) \ -{ \ - return gen8_instruction_bits(inst, high, low); \ -} - -F(src1_vert_stride, 120, 117) -F(src1_da1_width, 116, 114) -F(src1_da16_swiz_w, 115, 114) -F(src1_da16_swiz_z, 113, 112) -F(src1_da1_hstride, 113, 112) -F(src1_address_mode, 111, 111) -/** Src1.SrcMod @{ */ -F(src1_negate, 110, 110) -F(src1_abs, 109, 109) -/** @} */ -F(src1_ia1_subreg_nr, 108, 105) -F(src1_da_reg_nr, 108, 101) -F(src1_da16_subreg_nr, 100, 100) -F(src1_da1_subreg_nr, 100, 96) -F(src1_da16_swiz_y, 99, 98) -F(src1_da16_swiz_x, 97, 96) -F(src1_reg_type, 94, 91) -F(src1_reg_file, 90, 89) -F(src0_vert_stride, 88, 85) -F(src0_da1_width, 84, 82) -F(src0_da16_swiz_w, 83, 82) -F(src0_da16_swiz_z, 81, 80) -F(src0_da1_hstride, 81, 80) -F(src0_address_mode, 79, 79) -/** Src0.SrcMod @{ */ -F(src0_negate, 78, 78) -F(src0_abs, 77, 77) -/** @} */ -F(src0_ia1_subreg_nr, 76, 73) -F(src0_da_reg_nr, 76, 69) -F(src0_da16_subreg_nr, 68, 68) -F(src0_da1_subreg_nr, 68, 64) -F(src0_da16_swiz_y, 67, 66) -F(src0_da16_swiz_x, 65, 64) -F(dst_address_mode, 63, 63) -F(dst_da1_hstride, 62, 61) -F(dst_ia1_subreg_nr, 60, 57) -F(dst_da_reg_nr, 60, 53) -F(dst_da16_subreg_nr, 52, 52) -F(dst_da1_subreg_nr, 52, 48) -F(da16_writemask, 51, 48) /* Dst.ChanEn */ -F(src0_reg_type, 46, 43) -F(src0_reg_file, 42, 41) -F(dst_reg_type, 40, 37) -F(dst_reg_file, 36, 35) -F(mask_control, 34, 34) -F(flag_reg_nr, 33, 33) -F(flag_subreg_nr, 32, 32) -F(saturate, 31, 31) -F(branch_control, 30, 30) -F(debug_control, 30, 30) -F(cmpt_control, 29, 29) -F(acc_wr_control, 28, 28) -F(cond_modifier, 27, 24) -F(exec_size, 23, 21) -F(pred_inv, 20, 20) -F(pred_control, 19, 16) -F(thread_control, 15, 14) -F(qtr_control, 13, 12) -F(nib_control, 11, 11) -F(no_dd_check, 10, 10) -F(no_dd_clear, 9, 9) -F(access_mode, 8, 8) -/* Bit 7 is Reserved (for future Opcode expansion) */ -F(opcode, 6, 0) - -/** - * Three-source instructions: - * @{ - */ -F(src2_3src_reg_nr, 125, 118) -F(src2_3src_subreg_nr, 117, 115) -F(src2_3src_swizzle, 114, 107) -F(src2_3src_rep_ctrl, 106, 106) -F(src1_3src_reg_nr, 104, 97) -/* src1_3src_subreg_nr spans word boundaries and has to be handled specially */ -F(src1_3src_swizzle, 93, 86) -F(src1_3src_rep_ctrl, 85, 85) -F(src0_3src_reg_nr, 83, 76) -F(src0_3src_subreg_nr, 75, 73) -F(src0_3src_swizzle, 72, 65) -F(src0_3src_rep_ctrl, 64, 64) -F(dst_3src_reg_nr, 63, 56) -F(dst_3src_subreg_nr, 55, 53) -F(dst_3src_writemask, 52, 49) -F(dst_3src_type, 48, 46) -F(src_3src_type, 45, 43) -F(src2_3src_negate, 42, 42) -F(src2_3src_abs, 41, 41) -F(src1_3src_negate, 40, 40) -F(src1_3src_abs, 39, 39) -F(src0_3src_negate, 38, 38) -F(src0_3src_abs, 37, 37) -/** @} */ - -/** - * Fields for SEND messages: - * @{ - */ -F(eot, 127, 127) -F(mlen, 124, 121) -F(rlen, 120, 116) -F(header_present, 115, 115) -F(function_control, 114, 96) -F(sfid, 27, 24) -F(math_function, 27, 24) -/** @} */ - -/** - * URB message function control bits: - * @{ - */ -F(urb_per_slot_offset, 113, 113) -F(urb_interleave, 111, 111) -F(urb_global_offset, 110, 100) -F(urb_opcode, 99, 96) -/** @} */ - -/* Message descriptor bits */ -#define MD(name, high, low) F(name, (high + 96), (low + 96)) - -/** - * Sampler message function control bits: - * @{ - */ -MD(sampler_simd_mode, 18, 17) -MD(sampler_msg_type, 16, 12) -MD(sampler, 11, 8) -MD(binding_table_index, 7, 0) /* also used by other messages */ -/** @} */ - -/** - * Data port message function control bits: - * @{ - */ -MD(dp_category, 18, 18) -MD(dp_message_type, 17, 14) -MD(dp_message_control, 13, 8) -/** @} */ - -/** - * Scratch message bits: - * @{ - */ -MD(scratch_read_write, 17, 17) /* 0 = read, 1 = write */ -MD(scratch_type, 16, 16) /* 0 = OWord, 1 = DWord */ -MD(scratch_invalidate_after_read, 15, 15) -MD(scratch_block_size, 13, 12) -MD(scratch_addr_offset, 11, 0) -/** @} */ - -/** - * Render Target message function control bits: - * @{ - */ -MD(rt_last, 12, 12) -MD(rt_slot_group, 11, 11) -MD(rt_message_type, 10, 8) -/** @} */ - -/** - * Thread Spawn message function control bits: - * @{ - */ -MD(ts_resource_select, 4, 4) -MD(ts_request_type, 1, 1) -MD(ts_opcode, 0, 0) -/** @} */ - -/** - * Video Motion Estimation message function control bits: - * @{ - */ -F(vme_message_type, 14, 13) -/** @} */ - -/** - * Check & Refinement Engine message function control bits: - * @{ - */ -F(cre_message_type, 14, 13) -/** @} */ - -#undef MD -#undef F - -static inline void -gen8_set_src1_3src_subreg_nr(struct gen8_instruction *inst, unsigned v) -{ - assert((v & ~0x7) == 0); - - gen8_instruction_set_bits(inst, 95, 94, v & 0x3); - gen8_instruction_set_bits(inst, 96, 96, v >> 2); -} - -static inline unsigned -gen8_src1_3src_subreg_nr(struct gen8_instruction *inst) -{ - return gen8_instruction_bits(inst, 95, 94) | - (gen8_instruction_bits(inst, 96, 96) << 2); -} - -#define GEN8_IA1_ADDR_IMM(reg, nine, high, low) \ -static inline void \ -gen8_set_##reg##_ia1_addr_imm(struct gen8_instruction *inst, unsigned value) \ -{ \ - assert((value & ~0x3ff) == 0); \ - gen8_instruction_set_bits(inst, high, low, value & 0x1ff); \ - gen8_instruction_set_bits(inst, nine, nine, value >> 9); \ -} \ - \ -static inline unsigned \ -gen8_##reg##_ia1_addr_imm(struct gen8_instruction *inst) \ -{ \ - return gen8_instruction_bits(inst, high, low) | \ - (gen8_instruction_bits(inst, nine, nine) << 9); \ -} - -/* AddrImm[9:0] for Align1 Indirect Addressing */ -GEN8_IA1_ADDR_IMM(src1, 121, 104, 96) -GEN8_IA1_ADDR_IMM(src0, 95, 72, 64) -GEN8_IA1_ADDR_IMM(dst, 47, 56, 48) - -/** - * Flow control instruction bits: - * @{ - */ -static inline unsigned gen8_uip(struct gen8_instruction *inst) -{ - return inst->data[2]; -} -static inline void gen8_set_uip(struct gen8_instruction *inst, unsigned uip) -{ - inst->data[2] = uip; -} -static inline unsigned gen8_jip(struct gen8_instruction *inst) -{ - return inst->data[3]; -} -static inline void gen8_set_jip(struct gen8_instruction *inst, unsigned jip) -{ - inst->data[3] = jip; -} -/** @} */ - -static inline int gen8_src1_imm_d(struct gen8_instruction *inst) -{ - return inst->data[3]; -} -static inline unsigned gen8_src1_imm_ud(struct gen8_instruction *inst) -{ - return inst->data[3]; -} -static inline float gen8_src1_imm_f(struct gen8_instruction *inst) -{ - fi_type ft; - - ft.u = inst->data[3]; - return ft.f; -} - -void gen8_set_dst(const struct brw_context *brw, - struct gen8_instruction *inst, struct brw_reg reg); -void gen8_set_src0(const struct brw_context *brw, - struct gen8_instruction *inst, struct brw_reg reg); -void gen8_set_src1(const struct brw_context *brw, - struct gen8_instruction *inst, struct brw_reg reg); - -void gen8_set_urb_message(const struct brw_context *brw, - struct gen8_instruction *inst, - enum brw_urb_write_flags flags, - unsigned mlen, unsigned rlen, - unsigned offset, bool interleave); - -void gen8_set_sampler_message(const struct brw_context *brw, - struct gen8_instruction *inst, - unsigned binding_table_index, unsigned sampler, - unsigned msg_type, unsigned rlen, unsigned mlen, - bool header_present, unsigned simd_mode); - -void gen8_set_dp_message(const struct brw_context *brw, - struct gen8_instruction *inst, - enum brw_message_target sfid, - unsigned binding_table_index, - unsigned msg_type, - unsigned msg_control, - unsigned msg_length, - unsigned response_length, - bool header_present, - bool end_of_thread); - -void gen8_set_dp_scratch_message(const struct brw_context *brw, - struct gen8_instruction *inst, - bool write, - bool dword, - bool invalidate_after_read, - unsigned num_regs, - unsigned addr_offset, - unsigned msg_length, - unsigned response_length, - bool header_present, - bool end_of_thread); - -/** - * Fetch a set of contiguous bits from the instruction. - * - * Bits indexes range from 0..127; fields may not cross 32-bit boundaries. - */ -static inline unsigned -gen8_instruction_bits(struct gen8_instruction *inst, unsigned high, unsigned low) -{ - /* We assume the field doesn't cross 32-bit boundaries. */ - const unsigned word = high / 32; - assert(word == low / 32); - - high %= 32; - low %= 32; - - const unsigned mask = (((1 << (high - low + 1)) - 1) << low); - - return (inst->data[word] & mask) >> low; -} - -/** - * Set bits in the instruction, with proper shifting and masking. - * - * Bits indexes range from 0..127; fields may not cross 32-bit boundaries. - */ -static inline void -gen8_instruction_set_bits(struct gen8_instruction *inst, - unsigned high, - unsigned low, - unsigned value) -{ - const unsigned word = high / 32; - assert(word == low / 32); - - high %= 32; - low %= 32; - - const unsigned mask = (((1 << (high - low + 1)) - 1) << low); - - /* Make sure the supplied value actually fits in the given bitfield. */ - assert((value & (mask >> low)) == value); - - inst->data[word] = (inst->data[word] & ~mask) | ((value << low) & mask); -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp b/src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp deleted file mode 100644 index 3a81cf24267..00000000000 --- a/src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp +++ /dev/null @@ -1,925 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_vec4.h" -#include "brw_cfg.h" - -extern "C" { -#include "brw_eu.h" -#include "main/macros.h" -#include "program/prog_print.h" -#include "program/prog_parameter.h" -}; - -namespace brw { - -gen8_vec4_generator::gen8_vec4_generator(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_vec4_prog_data *prog_data, - void *mem_ctx, - bool debug_flag) - : gen8_generator(brw, shader_prog, prog, mem_ctx), - prog_data(prog_data), - debug_flag(debug_flag) -{ -} - -gen8_vec4_generator::~gen8_vec4_generator() -{ -} - -void -gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst, - struct brw_reg sampler_index) -{ - int msg_type = 0; - - switch (ir->opcode) { - case SHADER_OPCODE_TEX: - case SHADER_OPCODE_TXL: - if (ir->shadow_compare) { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; - } - break; - case SHADER_OPCODE_TXD: - if (ir->shadow_compare) { - msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; - } - break; - case SHADER_OPCODE_TXF: - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; - break; - case SHADER_OPCODE_TXF_CMS: - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; - break; - case SHADER_OPCODE_TXF_MCS: - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; - break; - case SHADER_OPCODE_TXS: - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; - break; - case SHADER_OPCODE_TG4: - if (ir->shadow_compare) { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; - } else { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; - } - break; - case SHADER_OPCODE_TG4_OFFSET: - if (ir->shadow_compare) { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; - } else { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; - } - break; - default: - unreachable("should not get here: invalid VS texture opcode"); - } - - assert(sampler_index.file == BRW_IMMEDIATE_VALUE); - assert(sampler_index.type == BRW_REGISTER_TYPE_UD); - - uint32_t sampler = sampler_index.dw1.ud; - - if (ir->header_present) { - MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD), - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - - default_state.access_mode = BRW_ALIGN_1; - - if (ir->texture_offset) { - /* Set the offset bits in DWord 2. */ - MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2), - BRW_REGISTER_TYPE_UD), - brw_imm_ud(ir->texture_offset)); - } - - if (sampler >= 16) { - /* The "Sampler Index" field can only store values between 0 and 15. - * However, we can add an offset to the "Sampler State Pointer" - * field, effectively selecting a different set of 16 samplers. - * - * The "Sampler State Pointer" needs to be aligned to a 32-byte - * offset, and each sampler state is only 16-bytes, so we can't - * exclusively use the offset - we have to use both. - */ - const int sampler_state_size = 16; /* 16 bytes */ - gen8_instruction *add = - ADD(get_element_ud(brw_message_reg(ir->base_mrf), 3), - get_element_ud(brw_vec8_grf(0, 0), 3), - brw_imm_ud(16 * (sampler / 16) * sampler_state_size)); - gen8_set_mask_control(add, BRW_MASK_DISABLE); - } - - default_state.access_mode = BRW_ALIGN_16; - } - - uint32_t surf_index = - prog_data->base.binding_table.texture_start + sampler; - - gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, inst, dst); - gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf)); - gen8_set_sampler_message(brw, inst, - surf_index, - sampler % 16, - msg_type, - 1, - ir->mlen, - ir->header_present, - BRW_SAMPLER_SIMD_MODE_SIMD4X2); - - brw_mark_surface_used(&prog_data->base, surf_index); -} - -void -gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs) -{ - struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); - - /* Copy g0. */ - if (vs) - MOV_RAW(header, brw_vec8_grf(0, 0)); - - gen8_instruction *inst; - if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) { - /* Enable Channel Masks in the URB_WRITE_OWORD message header */ - default_state.access_mode = BRW_ALIGN_1; - MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5), - brw_imm_ud(0xff00)); - default_state.access_mode = BRW_ALIGN_16; - } - - inst = next_inst(BRW_OPCODE_SEND); - gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset, - true); - gen8_set_dst(brw, inst, brw_null_reg()); - gen8_set_src0(brw, inst, header); -} - -void -gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header, - struct brw_reg src) -{ - /* Move the vertex count into the second MRF for the EOT write. */ - assert(eot_mrf_header.file == BRW_MESSAGE_REGISTER_FILE); - int dst_nr = GEN7_MRF_HACK_START + eot_mrf_header.nr + 1; - gen8_instruction *inst = - MOV(retype(brw_vec8_grf(dst_nr, 0), BRW_REGISTER_TYPE_UD), src); - gen8_set_mask_control(inst, BRW_MASK_DISABLE); -} - -void -gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir) -{ - struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); - gen8_instruction *inst; - - /* Enable Channel Masks in the URB_WRITE_HWORD message header */ - default_state.access_mode = BRW_ALIGN_1; - inst = MOV(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5), - BRW_REGISTER_TYPE_UD), - brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */ - gen8_set_mask_control(inst, BRW_MASK_DISABLE); - default_state.access_mode = BRW_ALIGN_16; - - /* mlen = 2: g0 header + vertex count */ - inst = next_inst(BRW_OPCODE_SEND); - gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true); - gen8_set_dst(brw, inst, brw_null_reg()); - gen8_set_src0(brw, inst, src); -} - -void -gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) -{ - /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message - * Header: M0.3): - * - * Slot 0 Offset. This field, after adding to the Global Offset field - * in the message descriptor, specifies the offset (in 256-bit units) - * from the start of the URB entry, as referenced by URB Handle 0, at - * which the data will be accessed. - * - * Similar text describes DWORD M0.4, which is slot 1 offset. - * - * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components - * of the register for geometry shader invocations 0 and 1) by the - * immediate value in src1, and store the result in DWORDs 3 and 4 of dst. - * - * We can do this with the following EU instruction: - * - * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all } - */ - default_state.access_mode = BRW_ALIGN_1; - gen8_instruction *inst = - MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1); - gen8_set_mask_control(inst, BRW_MASK_DISABLE); - default_state.access_mode = BRW_ALIGN_16; -} - -void -gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst, - struct brw_reg src) -{ - assert(src.file == BRW_IMMEDIATE_VALUE); - - default_state.access_mode = BRW_ALIGN_1; - - gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src); - gen8_set_mask_control(inst, BRW_MASK_DISABLE); - - default_state.access_mode = BRW_ALIGN_16; -} - -void -gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) -{ - /* We want to left shift just DWORD 4 (the x component belonging to the - * second geometry shader invocation) by 4 bits. So generate the - * instruction: - * - * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all } - */ - dst = suboffset(vec1(dst), 4); - default_state.access_mode = BRW_ALIGN_1; - gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4)); - gen8_set_mask_control(inst, BRW_MASK_DISABLE); - default_state.access_mode = BRW_ALIGN_16; -} - -void -gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst, - struct brw_reg src) -{ - /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message - * Header: M0.5): - * - * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask - * - * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1 - * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls - * Vertex 0 DATA[7]. This bit is ANDed with the corresponding - * channel enable to determine the final channel enable. For the - * URB_READ_OWORD & URB_READ_HWORD messages, when final channel - * enable is 1 it indicates that Vertex 1 DATA [3] will be included - * in the writeback message. For the URB_WRITE_OWORD & - * URB_WRITE_HWORD messages, when final channel enable is 1 it - * indicates that Vertex 1 DATA [3] will be written to the surface. - * - * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included - * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included - * - * 14 Vertex 1 DATA [2] Channel Mask - * 13 Vertex 1 DATA [1] Channel Mask - * 12 Vertex 1 DATA [0] Channel Mask - * 11 Vertex 0 DATA [3] Channel Mask - * 10 Vertex 0 DATA [2] Channel Mask - * 9 Vertex 0 DATA [1] Channel Mask - * 8 Vertex 0 DATA [0] Channel Mask - * - * (This is from a section of the PRM that is agnostic to the particular - * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to - * geometry shader invocations 0 and 1, respectively). Since we have the - * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0, - * and the enable flags for geometry shader invocation 1 in bits 7:0 of - * DWORD 4, we just need to OR them together and store the result in bits - * 15:8 of DWORD 5. - * - * It's easier to get the EU to do this if we think of the src and dst - * registers as composed of 32 bytes each; then, we want to pick up the - * contents of bytes 0 and 16 from src, OR them together, and store them in - * byte 21. - * - * We can do that by the following EU instruction: - * - * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all } - * - * Note: this relies on the source register having zeros in (a) bits 7:4 of - * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the - * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which - * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to - * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to - * contain valid channel mask values (which are in the range 0x0-0xf). - */ - dst = retype(dst, BRW_REGISTER_TYPE_UB); - src = retype(src, BRW_REGISTER_TYPE_UB); - - default_state.access_mode = BRW_ALIGN_1; - - gen8_instruction *inst = - OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16)); - gen8_set_mask_control(inst, BRW_MASK_DISABLE); - - default_state.access_mode = BRW_ALIGN_16; -} - -void -gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, - struct brw_reg index) -{ - int second_vertex_offset = 1; - - m1 = retype(m1, BRW_REGISTER_TYPE_D); - - /* Set up M1 (message payload). Only the block offsets in M1.0 and - * M1.4 are used, and the rest are ignored. - */ - struct brw_reg m1_0 = suboffset(vec1(m1), 0); - struct brw_reg m1_4 = suboffset(vec1(m1), 4); - struct brw_reg index_0 = suboffset(vec1(index), 0); - struct brw_reg index_4 = suboffset(vec1(index), 4); - - default_state.mask_control = BRW_MASK_DISABLE; - default_state.access_mode = BRW_ALIGN_1; - - MOV(m1_0, index_0); - - if (index.file == BRW_IMMEDIATE_VALUE) { - index_4.dw1.ud += second_vertex_offset; - MOV(m1_4, index_4); - } else { - ADD(m1_4, index_4, brw_imm_d(second_vertex_offset)); - } - - default_state.mask_control = BRW_MASK_ENABLE; - default_state.access_mode = BRW_ALIGN_16; -} - -void -gen8_vec4_generator::generate_scratch_read(vec4_instruction *ir, - struct brw_reg dst, - struct brw_reg index) -{ - struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); - - MOV_RAW(header, brw_vec8_grf(0, 0)); - - generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index); - - /* Each of the 8 channel enables is considered for whether each - * dword is written. - */ - gen8_instruction *send = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, send, dst); - gen8_set_src0(brw, send, header); - gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE, - 255, /* binding table index: stateless access */ - GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ, - BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, - 2, /* mlen */ - 1, /* rlen */ - true, /* header present */ - false); /* EOT */ -} - -void -gen8_vec4_generator::generate_scratch_write(vec4_instruction *ir, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg index) -{ - struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); - - MOV_RAW(header, brw_vec8_grf(0, 0)); - - generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index); - - MOV(retype(brw_message_reg(ir->base_mrf + 2), BRW_REGISTER_TYPE_D), - retype(src, BRW_REGISTER_TYPE_D)); - - /* Each of the 8 channel enables is considered for whether each - * dword is written. - */ - gen8_instruction *send = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, send, dst); - gen8_set_src0(brw, send, header); - gen8_set_pred_control(send, ir->predicate); - gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE, - 255, /* binding table index: stateless access */ - GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE, - BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, - 3, /* mlen */ - 0, /* rlen */ - true, /* header present */ - false); /* EOT */ -} - -void -gen8_vec4_generator::generate_pull_constant_load(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) -{ - assert(index.file == BRW_IMMEDIATE_VALUE && - index.type == BRW_REGISTER_TYPE_UD); - uint32_t surf_index = index.dw1.ud; - - assert(offset.file == BRW_GENERAL_REGISTER_FILE); - - /* Each of the 8 channel enables is considered for whether each - * dword is written. - */ - gen8_instruction *send = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, send, dst); - gen8_set_src0(brw, send, offset); - gen8_set_sampler_message(brw, send, - surf_index, - 0, /* The LD message ignores the sampler unit. */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ - BRW_SAMPLER_SIMD_MODE_SIMD4X2); - - brw_mark_surface_used(&prog_data->base, surf_index); -} - -void -gen8_vec4_generator::generate_untyped_atomic(vec4_instruction *ir, - struct brw_reg dst, - struct brw_reg atomic_op, - struct brw_reg surf_index) -{ - assert(atomic_op.file == BRW_IMMEDIATE_VALUE && - atomic_op.type == BRW_REGISTER_TYPE_UD && - surf_index.file == BRW_IMMEDIATE_VALUE && - surf_index.type == BRW_REGISTER_TYPE_UD); - assert((atomic_op.dw1.ud & ~0xf) == 0); - - unsigned msg_control = - atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */ - (1 << 5); /* Return data expected */ - - gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); - gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf), - BRW_REGISTER_TYPE_UD)); - gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1, - surf_index.dw1.ud, - HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2, - msg_control, - ir->mlen, - 1, - ir->header_present, - false); - - brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); -} - - - -void -gen8_vec4_generator::generate_untyped_surface_read(vec4_instruction *ir, - struct brw_reg dst, - struct brw_reg surf_index) -{ - assert(surf_index.file == BRW_IMMEDIATE_VALUE && - surf_index.type == BRW_REGISTER_TYPE_UD); - - gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); - gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); - gen8_set_src0(brw, inst, retype(brw_message_reg(ir->base_mrf), - BRW_REGISTER_TYPE_UD)); - gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1, - surf_index.dw1.ud, - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ, - 0xe, /* enable only the R channel */ - ir->mlen, - 1, - ir->header_present, - false); - - brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); -} - - -void -gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, - struct brw_reg dst, - struct brw_reg *src) -{ - vec4_instruction *ir = (vec4_instruction *) instruction; - - if (dst.width == BRW_WIDTH_4) { - /* This happens in attribute fixups for "dual instanced" geometry - * shaders, since they use attributes that are vec4's. Since the exec - * width is only 4, it's essential that the caller set - * force_writemask_all in order to make sure the instruction is executed - * regardless of which channels are enabled. - */ - assert(ir->force_writemask_all); - - /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy - * the following register region restrictions (from Graphics BSpec: - * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions - * > Register Region Restrictions) - * - * 1. ExecSize must be greater than or equal to Width. - * - * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set - * to Width * HorzStride." - */ - for (int i = 0; i < 3; i++) { - if (src[i].file == BRW_GENERAL_REGISTER_FILE) - src[i] = stride(src[i], 4, 4, 1); - } - } - - switch (ir->opcode) { - case BRW_OPCODE_MOV: - MOV(dst, src[0]); - break; - - case BRW_OPCODE_ADD: - ADD(dst, src[0], src[1]); - break; - - case BRW_OPCODE_MUL: - MUL(dst, src[0], src[1]); - break; - - case BRW_OPCODE_MACH: - MACH(dst, src[0], src[1]); - break; - - case BRW_OPCODE_MAD: - MAD(dst, src[0], src[1], src[2]); - break; - - case BRW_OPCODE_FRC: - FRC(dst, src[0]); - break; - - case BRW_OPCODE_RNDD: - RNDD(dst, src[0]); - break; - - case BRW_OPCODE_RNDE: - RNDE(dst, src[0]); - break; - - case BRW_OPCODE_RNDZ: - RNDZ(dst, src[0]); - break; - - case BRW_OPCODE_AND: - AND(dst, src[0], src[1]); - break; - - case BRW_OPCODE_OR: - OR(dst, src[0], src[1]); - break; - - case BRW_OPCODE_XOR: - XOR(dst, src[0], src[1]); - break; - - case BRW_OPCODE_NOT: - NOT(dst, src[0]); - break; - - case BRW_OPCODE_ASR: - ASR(dst, src[0], src[1]); - break; - - case BRW_OPCODE_SHR: - SHR(dst, src[0], src[1]); - break; - - case BRW_OPCODE_SHL: - SHL(dst, src[0], src[1]); - break; - - case BRW_OPCODE_CMP: - CMP(dst, ir->conditional_mod, src[0], src[1]); - break; - - case BRW_OPCODE_SEL: - SEL(dst, src[0], src[1]); - break; - - case BRW_OPCODE_DPH: - DPH(dst, src[0], src[1]); - break; - - case BRW_OPCODE_DP4: - DP4(dst, src[0], src[1]); - break; - - case BRW_OPCODE_DP3: - DP3(dst, src[0], src[1]); - break; - - case BRW_OPCODE_DP2: - DP2(dst, src[0], src[1]); - break; - - case BRW_OPCODE_F32TO16: - /* Emulate the Gen7 zeroing bug. */ - MOV(retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); - MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]); - break; - - case BRW_OPCODE_F16TO32: - MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF)); - break; - - case BRW_OPCODE_LRP: - LRP(dst, src[0], src[1], src[2]); - break; - - case BRW_OPCODE_BFREV: - /* BFREV only supports UD type for src and dst. */ - BFREV(retype(dst, BRW_REGISTER_TYPE_UD), - retype(src[0], BRW_REGISTER_TYPE_UD)); - break; - - case BRW_OPCODE_FBH: - /* FBH only supports UD type for dst. */ - FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - - case BRW_OPCODE_FBL: - /* FBL only supports UD type for dst. */ - FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - - case BRW_OPCODE_CBIT: - /* CBIT only supports UD type for dst. */ - CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - - case BRW_OPCODE_ADDC: - ADDC(dst, src[0], src[1]); - break; - - case BRW_OPCODE_SUBB: - SUBB(dst, src[0], src[1]); - break; - - case BRW_OPCODE_BFE: - BFE(dst, src[0], src[1], src[2]); - break; - - case BRW_OPCODE_BFI1: - BFI1(dst, src[0], src[1]); - break; - - case BRW_OPCODE_BFI2: - BFI2(dst, src[0], src[1], src[2]); - break; - - case BRW_OPCODE_IF: - IF(ir->predicate); - break; - - case BRW_OPCODE_ELSE: - ELSE(); - break; - - case BRW_OPCODE_ENDIF: - ENDIF(); - break; - - case BRW_OPCODE_DO: - DO(); - break; - - case BRW_OPCODE_BREAK: - BREAK(); - break; - - case BRW_OPCODE_CONTINUE: - CONTINUE(); - break; - - case BRW_OPCODE_WHILE: - WHILE(); - break; - - case SHADER_OPCODE_RCP: - MATH(BRW_MATH_FUNCTION_INV, dst, src[0]); - break; - - case SHADER_OPCODE_RSQ: - MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]); - break; - - case SHADER_OPCODE_SQRT: - MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]); - break; - - case SHADER_OPCODE_EXP2: - MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]); - break; - - case SHADER_OPCODE_LOG2: - MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]); - break; - - case SHADER_OPCODE_SIN: - MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]); - break; - - case SHADER_OPCODE_COS: - MATH(BRW_MATH_FUNCTION_COS, dst, src[0]); - break; - - case SHADER_OPCODE_POW: - MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_INT_QUOTIENT: - MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_INT_REMAINDER: - MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_TEX: - case SHADER_OPCODE_TXD: - case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXF_MCS: - case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXS: - case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: - /* note: src[0] is unused. */ - generate_tex(ir, dst, src[1]); - break; - - case VS_OPCODE_URB_WRITE: - generate_urb_write(ir, true); - break; - - case SHADER_OPCODE_GEN4_SCRATCH_READ: - generate_scratch_read(ir, dst, src[0]); - break; - - case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - generate_scratch_write(ir, dst, src[0], src[1]); - break; - - case VS_OPCODE_PULL_CONSTANT_LOAD: - case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: - generate_pull_constant_load(ir, dst, src[0], src[1]); - break; - - case GS_OPCODE_URB_WRITE: - generate_urb_write(ir, false); - break; - - case GS_OPCODE_THREAD_END: - generate_gs_thread_end(ir); - break; - - case GS_OPCODE_SET_WRITE_OFFSET: - generate_gs_set_write_offset(dst, src[0], src[1]); - break; - - case GS_OPCODE_SET_VERTEX_COUNT: - generate_gs_set_vertex_count(dst, src[0]); - break; - - case GS_OPCODE_SET_DWORD_2_IMMED: - generate_gs_set_dword_2_immed(dst, src[0]); - break; - - case GS_OPCODE_PREPARE_CHANNEL_MASKS: - generate_gs_prepare_channel_masks(dst); - break; - - case GS_OPCODE_SET_CHANNEL_MASKS: - generate_gs_set_channel_masks(dst, src[0]); - break; - - case SHADER_OPCODE_SHADER_TIME_ADD: - unreachable("XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time"); - - case SHADER_OPCODE_UNTYPED_ATOMIC: - generate_untyped_atomic(ir, dst, src[0], src[1]); - break; - - case SHADER_OPCODE_UNTYPED_SURFACE_READ: - generate_untyped_surface_read(ir, dst, src[0]); - break; - - case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: - unreachable("VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+."); - - default: - if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) { - _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n", - opcode_descs[ir->opcode].name); - } else { - _mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode); - } - abort(); - } -} - -void -gen8_vec4_generator::generate_code(exec_list *instructions) -{ - struct annotation_info annotation; - memset(&annotation, 0, sizeof(annotation)); - - cfg_t *cfg = NULL; - if (unlikely(debug_flag)) - cfg = new(mem_ctx) cfg_t(instructions); - - foreach_in_list(vec4_instruction, ir, instructions) { - struct brw_reg src[3], dst; - - if (unlikely(debug_flag)) - annotate(brw, &annotation, cfg, ir, next_inst_offset); - - for (unsigned int i = 0; i < 3; i++) { - src[i] = ir->get_src(prog_data, i); - } - dst = ir->get_dst(); - - default_state.conditional_mod = ir->conditional_mod; - default_state.predicate = ir->predicate; - default_state.predicate_inverse = ir->predicate_inverse; - default_state.saturate = ir->saturate; - default_state.mask_control = ir->force_writemask_all; - - const unsigned pre_emit_nr_inst = nr_inst; - - generate_vec4_instruction(ir, dst, src); - - if (ir->no_dd_clear || ir->no_dd_check) { - assert(nr_inst == pre_emit_nr_inst + 1 || - !"no_dd_check or no_dd_clear set for IR emitting more " - "than 1 instruction"); - - gen8_instruction *last = &store[pre_emit_nr_inst]; - gen8_set_no_dd_clear(last, ir->no_dd_clear); - gen8_set_no_dd_check(last, ir->no_dd_check); - } - } - - patch_jump_targets(); - annotation_finalize(&annotation, next_inst_offset); - - int before_size = next_inst_offset; - - if (unlikely(debug_flag)) { - if (shader_prog) { - fprintf(stderr, "Native code for %s vertex shader %d:\n", - shader_prog->Label ? shader_prog->Label : "unnamed", - shader_prog->Name); - } else { - fprintf(stderr, "Native code for vertex program %d:\n", prog->Id); - } - fprintf(stderr, "vec4 shader: %d instructions.\n", before_size / 16); - - dump_assembly(store, annotation.ann_count, annotation.ann, brw, prog); - ralloc_free(annotation.ann); - } -} - -const unsigned * -gen8_vec4_generator::generate_assembly(exec_list *instructions, - unsigned *assembly_size) -{ - default_state.access_mode = BRW_ALIGN_16; - default_state.exec_size = BRW_EXECUTE_8; - generate_code(instructions); - - *assembly_size = next_inst_offset; - return (const unsigned *) store; -} - -} /* namespace brw */ -- cgit v1.2.3