From 8babaa84e8141fe4ad248ab68b51e27a6025d7d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 15 Nov 2018 15:17:06 -0600 Subject: intel/eu: Add support for the SENDS[C] messages Reviewed-by: Iago Toral Quiroga --- src/intel/compiler/brw_eu.h | 11 +++ src/intel/compiler/brw_eu_emit.c | 141 +++++++++++++++++++++++++++++++++-- src/intel/compiler/brw_eu_validate.c | 65 +++++++++++++++- src/intel/compiler/brw_inst.h | 57 +++++++++++--- 4 files changed, 255 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 2c4ccaecb94..a7041ea4a34 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -811,6 +811,17 @@ brw_send_indirect_message(struct brw_codegen *p, struct brw_reg desc, unsigned desc_imm); +void +brw_send_indirect_split_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload0, + struct brw_reg payload1, + struct brw_reg desc, + unsigned desc_imm, + struct brw_reg ex_desc, + unsigned ex_desc_imm); + void brw_ff_sync(struct brw_codegen *p, struct brw_reg dest, unsigned msg_reg_nr, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 06084dcdc50..9be82d1b87c 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -96,7 +96,19 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) gen7_convert_mrf_to_grf(p, &dest); - { + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr % 16 == 0); + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1); + assert(!dest.negate && !dest.abs); + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file); + } else { brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); @@ -177,8 +189,11 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) gen7_convert_mrf_to_grf(p, ®); - if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + if (devinfo->gen >= 6 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) { /* Any source modifiers or regions will be ignored, since this just * identifies the MRF/GRF to start reading the message contents from. * Check for some likely failures. @@ -188,7 +203,17 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) assert(reg.address_mode == BRW_ADDRESS_DIRECT); } - { + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr % 16 == 0); + assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1); + assert(!reg.negate && !reg.abs); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } else { brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); brw_inst_set_src0_abs(devinfo, inst, reg.abs); brw_inst_set_src0_negate(devinfo, inst, reg.negate); @@ -282,7 +307,18 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); - { + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE || + reg.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file); + } else { /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: * * "Accumulator registers may be accessed explicitly as src0 @@ -2484,6 +2520,101 @@ brw_send_indirect_message(struct brw_codegen *p, brw_inst_set_sfid(devinfo, send, sfid); } +void +brw_send_indirect_split_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload0, + struct brw_reg payload1, + struct brw_reg desc, + unsigned desc_imm, + struct brw_reg ex_desc, + unsigned ex_desc_imm) +{ + const struct gen_device_info *devinfo = p->devinfo; + struct brw_inst *send; + + dst = retype(dst, BRW_REGISTER_TYPE_UW); + + assert(desc.type == BRW_REGISTER_TYPE_UD); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + desc.ud |= desc_imm; + } else { + struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Load the indirect descriptor to an address register using OR so the + * caller can specify additional descriptor bits with the desc_imm + * immediate. + */ + brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); + + brw_pop_insn_state(p); + desc = addr; + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + ex_desc.ud |= ex_desc_imm; + } else { + struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Load the indirect extended descriptor to an address register using OR + * so the caller can specify additional descriptor bits with the + * desc_imm immediate. + * + * Even though the instruction dispatcher always pulls the SFID from the + * instruction itself, the extended descriptor sent to the actual unit + * gets the SFID from the extended descriptor which comes from the + * address register. If we don't OR it in, the external unit gets + * confused and hangs the GPU. + */ + brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid)); + + brw_pop_insn_state(p); + ex_desc = addr; + } + + send = next_insn(p, BRW_OPCODE_SENDS); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD)); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_desc(devinfo, send, 0); + brw_inst_set_send_desc(devinfo, send, desc.ud); + } else { + assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(desc.nr == BRW_ARF_ADDRESS); + assert(desc.subnr == 0); + brw_inst_set_send_sel_reg32_desc(devinfo, send, 1); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0); + brw_inst_set_send_ex_desc(devinfo, send, ex_desc.ud); + } else { + assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(ex_desc.nr == BRW_ARF_ADDRESS); + assert((ex_desc.subnr & 0x3) == 0); + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1); + brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2); + } + + brw_inst_set_sfid(devinfo, send, sfid); +} + static void brw_send_indirect_surface_message(struct brw_codegen *p, unsigned sfid, diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 5e505992842..358a0347a93 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -102,6 +102,18 @@ inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst) } } +static bool +inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + switch (brw_inst_opcode(devinfo, inst)) { + case BRW_OPCODE_SENDS: + case BRW_OPCODE_SENDSC: + return true; + default: + return false; + } +} + static unsigned signed_type(unsigned type) { @@ -248,6 +260,12 @@ sources_not_null(const struct gen_device_info *devinfo, if (num_sources == 3) return (struct string){}; + /* Nothing to test. Split sends can only encode a file in sources that are + * allowed to be NULL. + */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + if (num_sources >= 1) ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); @@ -263,8 +281,41 @@ send_restrictions(const struct gen_device_info *devinfo, { struct string error_msg = { .str = NULL, .len = 0 }; - if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) { + if (inst_is_split_send(devinfo, inst)) { + ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL, + "src1 of split send must be a GRF or NULL"); + + ERROR_IF(brw_inst_eot(devinfo, inst) && + brw_inst_src0_da_reg_nr(devinfo, inst) < 112, + "send with EOT must use g112-g127"); + ERROR_IF(brw_inst_eot(devinfo, inst) && + brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && + brw_inst_send_src1_reg_nr(devinfo, inst) < 112, + "send with EOT must use g112-g127"); + + if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) { + /* Assume minimums if we don't know */ + unsigned mlen = 1; + if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) { + const uint32_t desc = brw_inst_send_desc(devinfo, inst); + mlen = brw_message_desc_mlen(devinfo, desc); + } + + unsigned ex_mlen = 1; + if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { + const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst); + ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc); + } + const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst); + const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst); + ERROR_IF((src0_reg_nr <= src1_reg_nr && + src1_reg_nr < src0_reg_nr + mlen) || + (src1_reg_nr <= src0_reg_nr && + src0_reg_nr < src1_reg_nr + ex_mlen), + "split send payloads must not overlap"); + } + } else if (inst_is_send(devinfo, inst)) { ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, "send must use direct addressing"); @@ -534,6 +585,12 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo, if (num_sources == 3) return (struct string){}; + /* Split sends don't have the bits in the instruction to encode regions so + * there's nothing to check. + */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, @@ -1124,6 +1181,10 @@ special_requirements_for_handling_double_precision_data_types( if (num_sources == 3 || num_sources == 0) return (struct string){}; + /* Split sends don't have types so there's no doubles there. */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + enum brw_reg_type exec_type = execution_type(devinfo, inst); unsigned exec_type_size = brw_reg_type_to_size(exec_type); diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index f60325c1a6b..71316f12215 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -455,6 +455,19 @@ FJ(gen4_jump_count, 111, 96, devinfo->gen < 6) FC(gen4_pop_count, 115, 112, devinfo->gen < 6) /** @} */ +/** + * SEND instructions: + * @{ + */ +FC(send_ex_desc_ia_subreg_nr, 82, 80, devinfo->gen >= 9) +FC(send_src0_address_mode, 79, 79, devinfo->gen >= 9) +FC(send_sel_reg32_desc, 77, 77, devinfo->gen >= 9) +FC(send_sel_reg32_ex_desc, 61, 61, devinfo->gen >= 9) +FC(send_src1_reg_nr, 51, 44, devinfo->gen >= 9) +FC(send_src1_reg_file, 36, 36, devinfo->gen >= 9) +FC(send_dst_reg_file, 35, 35, devinfo->gen >= 9) +/** @} */ + /* Message descriptor bits */ #define MD(x) ((x) + 96) @@ -513,11 +526,21 @@ brw_inst_set_send_ex_desc(const struct gen_device_info *devinfo, brw_inst *inst, uint32_t value) { assert(devinfo->gen >= 9); - brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28)); - brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24)); - brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20)); - brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16)); - assert(GET_BITS(value, 15, 0) == 0); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) { + brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28)); + brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24)); + brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20)); + brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16)); + assert(GET_BITS(value, 15, 0) == 0); + } else { + assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC); + brw_inst_set_bits(inst, 95, 80, GET_BITS(value, 31, 16)); + assert(GET_BITS(value, 15, 10) == 0); + brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 9, 6)); + assert(GET_BITS(value, 5, 0) == 0); + } } /** @@ -530,10 +553,18 @@ brw_inst_send_ex_desc(const struct gen_device_info *devinfo, const brw_inst *inst) { assert(devinfo->gen >= 9); - return (brw_inst_bits(inst, 94, 91) << 28 | - brw_inst_bits(inst, 88, 85) << 24 | - brw_inst_bits(inst, 83, 80) << 20 | - brw_inst_bits(inst, 67, 64) << 16); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) { + return (brw_inst_bits(inst, 94, 91) << 28 | + brw_inst_bits(inst, 88, 85) << 24 | + brw_inst_bits(inst, 83, 80) << 20 | + brw_inst_bits(inst, 67, 64) << 16); + } else { + assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC); + return (brw_inst_bits(inst, 95, 80) << 16 | + brw_inst_bits(inst, 67, 64) << 6); + } } /** @@ -956,9 +987,11 @@ brw_inst_##reg##_ia16_addr_imm(const struct gen_device_info *devinfo, \ * Compared to Align1, these are missing the low 4 bits. * -Gen 4- ----Gen8---- */ -BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100) -BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68) -BRW_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52) +BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100) +BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68) +BRW_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52) +BRW_IA16_ADDR_IMM(send_src0, -1, -1, 78, 72, 68) +BRW_IA16_ADDR_IMM(send_dst, -1, -1, 62, 56, 52) /** * Fetch a set of contiguous bits from the instruction. -- cgit v1.2.3