diff options
author | Ruiling Song <ruiling.song@intel.com> | 2015-10-29 15:19:16 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2016-11-08 19:06:55 +0800 |
commit | 7af24651a0c3c2816b507e22543e31ab7fb6859e (patch) | |
tree | f7ffe8e8a8a7cbf98489831d9c06528390b43713 | |
parent | a7a2f1bfef4181130ea766ee857c555f691776e2 (diff) |
GBE: add byte scatter a64 message
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/backend/gen8_context.cpp | 14 | ||||
-rw-r--r-- | backend/src/backend/gen8_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen8_encoder.cpp | 67 | ||||
-rw-r--r-- | backend/src/backend/gen8_encoder.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen8_instruction.hpp | 13 | ||||
-rw-r--r-- | backend/src/backend/gen_context.cpp | 8 | ||||
-rw-r--r-- | backend/src/backend/gen_context.hpp | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_encoder.cpp | 7 | ||||
-rw-r--r-- | backend/src/backend/gen_encoder.hpp | 4 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_gen7_schedule_info.hxx | 2 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 42 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hxx | 2 |
12 files changed, 165 insertions, 0 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 538d5452..66ce028f 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -912,6 +912,20 @@ namespace gbe const uint32_t elemNum = insn.extra.elem; p->UNTYPED_WRITEA64(src, elemNum); } + + void Gen8Context::emitByteGatherA64Instruction(const SelectionInstruction &insn) { + const GenRegister dst = ra->genReg(insn.dst(0)); + const GenRegister src = ra->genReg(insn.src(0)); + const uint32_t elemSize = insn.extra.elem; + p->BYTE_GATHERA64(dst, src, elemSize); + } + + void Gen8Context::emitByteScatterA64Instruction(const SelectionInstruction &insn) { + const GenRegister src = ra->genReg(insn.src(0)); + const uint32_t elemSize = insn.extra.elem; + p->BYTE_SCATTERA64(src, elemSize); + } + void Gen8Context::emitRead64Instruction(const SelectionInstruction &insn) { const uint32_t elemNum = insn.extra.elem; diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp index 529dd177..f5e5522c 100644 --- a/backend/src/backend/gen8_context.hpp +++ b/backend/src/backend/gen8_context.hpp @@ -68,6 +68,8 @@ namespace gbe virtual void emitUntypedWriteA64Instruction(const SelectionInstruction &insn); virtual void emitUntypedReadA64Instruction(const SelectionInstruction &insn); + virtual void emitByteGatherA64Instruction(const SelectionInstruction &insn); + virtual void emitByteScatterA64Instruction(const SelectionInstruction &insn); virtual void emitWrite64Instruction(const SelectionInstruction &insn); virtual void emitRead64Instruction(const SelectionInstruction &insn); virtual void emitI64MULInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 92a2d339..8861c3f3 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -84,6 +84,25 @@ namespace gbe NOT_SUPPORTED; } + static void setDPByteScatterGather(GenEncoder *p, + GenNativeInstruction *insn, + uint32_t bti, + uint32_t block_size, + uint32_t data_size, + uint32_t msg_type, + uint32_t msg_length, + uint32_t response_length) + { + const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA; + Gen8NativeInstruction *gen8_insn = &insn->gen8_insn; + p->setMessageDescriptor(insn, sfid, msg_length, response_length); + gen8_insn->bits3.gen8_scatter_rw_a64.msg_type = msg_type; + gen8_insn->bits3.gen8_scatter_rw_a64.bti = bti; + gen8_insn->bits3.gen8_scatter_rw_a64.data_sz = data_size; + gen8_insn->bits3.gen8_scatter_rw_a64.block_sz = block_size; + GBE_ASSERT(p->curr.execWidth == 8); + } + void Gen8Encoder::setTypedWriteMessage(GenNativeInstruction *insn, unsigned char bti, unsigned char msg_type, uint32_t msg_length, bool header_present) { @@ -276,6 +295,54 @@ namespace gbe response_length); } + void Gen8Encoder::BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t elemSize) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + this->setHeader(insn); + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA; + + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); + this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); + + this->setSrc1(insn, GenRegister::immud(0)); + //setByteGatherMessageDesc(insn, bti.value.ud, elemSize); + GBE_ASSERT(this->curr.execWidth == 8); + const uint32_t msg_length = 2; + const uint32_t response_length = 1; + setDPByteScatterGather(this, + insn, + 0xff, + 0x0, + elemSize, + GEN8_P1_BYTE_GATHER_A64, + msg_length, + response_length); + } + + void Gen8Encoder::BYTE_SCATTERA64(GenRegister msg, uint32_t elemSize) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + + this->setHeader(insn); + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA; + + // only support simd8 + GBE_ASSERT(this->curr.execWidth == 8); + this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); + + this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0)); + + this->setSrc1(insn, GenRegister::immud(0)); + const uint32_t msg_length = 3; + const uint32_t response_length = 0; + setDPByteScatterGather(this, + insn, + 0xff, + 0x0, + elemSize, + GEN8_P1_BYTE_SCATTER_A64, + msg_length, + response_length); + } + void Gen8Encoder::LOAD_INT64_IMM(GenRegister dest, GenRegister value) { MOV(dest, value); } diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp index 9c0102dc..321969a6 100644 --- a/backend/src/backend/gen8_encoder.hpp +++ b/backend/src/backend/gen8_encoder.hpp @@ -48,6 +48,8 @@ namespace gbe virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum); virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum); virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum); + virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t elemSize); + virtual void BYTE_SCATTERA64(GenRegister src, uint32_t elemSize); virtual void setHeader(GenNativeInstruction *insn); virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba, uint32_t msg_type, uint32_t msg_length, uint32_t response_length); diff --git a/backend/src/backend/gen8_instruction.hpp b/backend/src/backend/gen8_instruction.hpp index 01769afb..9c2c9fe6 100644 --- a/backend/src/backend/gen8_instruction.hpp +++ b/backend/src/backend/gen8_instruction.hpp @@ -580,6 +580,19 @@ union Gen8NativeInstruction } gen8_untyped_rw_a64; struct { + uint32_t bti:8; + uint32_t block_sz:2; // 00 byte 01 dword + uint32_t data_sz:2; // 0 ->1block 1->2block + uint32_t ignored:2; + uint32_t msg_type:5; // 10000 scatter read, 11010 scatter write 11001 a64 untyped write + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad2:2; + uint32_t end_of_thread:1; + } gen8_scatter_rw_a64; + + struct { uint32_t src1_subreg_nr_high:1; uint32_t src1_reg_nr:8; uint32_t src1_subreg_nr_w:1; diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 5191be29..4e41cf19 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2222,6 +2222,14 @@ namespace gbe assert(0); } + void GenContext::emitByteGatherA64Instruction(const SelectionInstruction &insn) { + assert(0); + } + + void GenContext::emitByteScatterA64Instruction(const SelectionInstruction &insn) { + assert(0); + } + void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.src(0)); for(uint32_t i = 0; i < insn.dstNum; i++) { diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 5189b27c..2ce559c2 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -163,6 +163,8 @@ namespace gbe void emitUntypedWriteInstruction(const SelectionInstruction &insn); virtual void emitUntypedReadA64Instruction(const SelectionInstruction &insn); virtual void emitUntypedWriteA64Instruction(const SelectionInstruction &insn); + virtual void emitByteGatherA64Instruction(const SelectionInstruction &insn); + virtual void emitByteScatterA64Instruction(const SelectionInstruction &insn); void emitAtomicInstruction(const SelectionInstruction &insn); void emitByteGatherInstruction(const SelectionInstruction &insn); void emitByteScatterInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index fdad0359..f4a611ba 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -532,6 +532,13 @@ namespace gbe this->setSrc1(insn, bti); } } + void GenEncoder::BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t elemSize) { + assert(0); + } + + void GenEncoder::BYTE_SCATTERA64(GenRegister src, uint32_t elemSize){ + assert(0); + } void GenEncoder::DWORD_GATHER(GenRegister dst, GenRegister src, uint32_t bti) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 6ea18609..0de17ec3 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -183,6 +183,10 @@ namespace gbe void BYTE_GATHER(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemSize); /*! Byte scatter (for unaligned bytes, shorts and ints) */ void BYTE_SCATTER(GenRegister src, GenRegister bti, uint32_t elemSize); + /*! Byte gather a64 (for unaligned bytes, shorts and ints) */ + virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t elemSize); + /*! Byte scatter a64 (for unaligned bytes, shorts and ints) */ + virtual void BYTE_SCATTERA64(GenRegister src, uint32_t elemSize); /*! DWord gather (for constant cache read) */ void DWORD_GATHER(GenRegister dst, GenRegister src, uint32_t bti); /*! for scratch memory read */ diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index e2465e82..55d8a3cb 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -30,6 +30,8 @@ DECL_GEN7_SCHEDULE(UntypedRead, 160, 1, 1) DECL_GEN7_SCHEDULE(UntypedWrite, 160, 1, 1) DECL_GEN7_SCHEDULE(UntypedReadA64, 160, 1, 1) DECL_GEN7_SCHEDULE(UntypedWriteA64, 160, 1, 1) +DECL_GEN7_SCHEDULE(ByteGatherA64, 160, 1, 1) +DECL_GEN7_SCHEDULE(ByteScatterA64, 160, 1, 1) DECL_GEN7_SCHEDULE(ByteGather, 160, 1, 1) DECL_GEN7_SCHEDULE(ByteScatter, 160, 1, 1) DECL_GEN7_SCHEDULE(DWordGather, 160, 1, 1) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 622bff5a..85fae5c7 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -187,6 +187,7 @@ namespace gbe this->opcode == SEL_OP_READ64 || this->opcode == SEL_OP_ATOMIC || this->opcode == SEL_OP_BYTE_GATHER || + this->opcode == SEL_OP_BYTE_GATHERA64 || this->opcode == SEL_OP_SAMPLE || this->opcode == SEL_OP_VME || this->opcode == SEL_OP_DWORD_GATHER || @@ -214,6 +215,7 @@ namespace gbe this->opcode == SEL_OP_WRITE64 || this->opcode == SEL_OP_ATOMIC || this->opcode == SEL_OP_BYTE_SCATTER || + this->opcode == SEL_OP_BYTE_SCATTERA64 || this->opcode == SEL_OP_TYPED_WRITE || this->opcode == SEL_OP_OBWRITE || this->opcode == SEL_OP_MBWRITE; @@ -646,6 +648,10 @@ namespace gbe void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti, vector<GenRegister> temps); /*! Byte scatter (for unaligned bytes, shorts and ints) */ void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti, vector <GenRegister> temps); + /*! Byte gather a64 (for unaligned bytes, shorts and ints) */ + void BYTE_GATHERA64(Reg dst, Reg addr, uint32_t elemSize); + /*! Byte scatter (for unaligned bytes, shorts and ints) */ + void BYTE_SCATTERA64(GenRegister *msg, unsigned msgNum, uint32_t elemSize); /*! Untyped read (up to 4 elements) */ void UNTYPED_READA64(Reg addr, const GenRegister *dst, uint32_t dstNum, uint32_t elemNum); /*! Untyped write (up to 4 elements) */ @@ -1668,6 +1674,42 @@ namespace gbe vector->reg = &insn->src(0); } + void Selection::Opaque::BYTE_GATHERA64(Reg dst, Reg addr, uint32_t elemSize) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_GATHERA64, 1, 1); + SelectionVector *srcVector = this->appendVector(); + SelectionVector *dstVector = this->appendVector(); + + if (this->isScalarReg(dst.reg())) + insn->state.noMask = 1; + + insn->src(0) = addr; + insn->dst(0) = dst; + insn->extra.elem = elemSize; + + dstVector->regNum = 1; + dstVector->isSrc = 0; + dstVector->offsetID = 0; + dstVector->reg = &insn->dst(0); + srcVector->regNum = 1; + srcVector->isSrc = 1; + srcVector->offsetID = 0; + srcVector->reg = &insn->src(0); + } + + void Selection::Opaque::BYTE_SCATTERA64(GenRegister *msg, uint32_t msgNum, uint32_t elemSize) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_SCATTERA64, 0, msgNum); + SelectionVector *vector = this->appendVector(); + for (unsigned i = 0; i < msgNum; i++) + insn->src(i) = msg[i]; + + insn->extra.elem = elemSize; + + vector->regNum = msgNum; + vector->isSrc = 1; + vector->offsetID = 0; + vector->reg = &insn->src(0); + } + void Selection::Opaque::DWORD_GATHER(Reg dst, Reg addr, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_DWORD_GATHER, 1, 1); SelectionVector *vector = this->appendVector(); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 854b6daa..a48fa3e6 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -60,6 +60,8 @@ DECL_SELECTION_IR(READ64, Read64Instruction) DECL_SELECTION_IR(WRITE64, Write64Instruction) DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction) DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction) +DECL_SELECTION_IR(BYTE_GATHERA64, ByteGatherA64Instruction) +DECL_SELECTION_IR(BYTE_SCATTERA64, ByteScatterA64Instruction) DECL_SELECTION_IR(DWORD_GATHER, DWordGatherInstruction) DECL_SELECTION_IR(PACK_BYTE, PackByteInstruction) DECL_SELECTION_IR(UNPACK_BYTE, UnpackByteInstruction) |