From 3433471e8b46dd9dd042a00f88ef9ad011a94aac Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 1 Sep 2012 17:26:24 +0200 Subject: nvc0/ir: add initial code to support GK110 ISA encoding --- src/gallium/drivers/nv50/codegen/nv50_ir_driver.h | 5 + src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h | 25 + src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp | 1 + .../drivers/nv50/codegen/nv50_ir_target.cpp | 1 + src/gallium/drivers/nvc0/Makefile.sources | 1 + .../drivers/nvc0/codegen/nv50_ir_emit_gk110.cpp | 1628 ++++++++++++++++++++ .../drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp | 23 +- .../drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp | 10 +- .../drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp | 4 +- .../drivers/nvc0/codegen/nv50_ir_target_nvc0.h | 6 +- 10 files changed, 1691 insertions(+), 13 deletions(-) create mode 100644 src/gallium/drivers/nvc0/codegen/nv50_ir_emit_gk110.cpp diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h index 9632986fe40..446befa5fb7 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h @@ -93,6 +93,11 @@ struct nv50_ir_prog_symbol uint32_t offset; }; +#define NVISA_GF100_CHIPSET_C0 0xc0 +#define NVISA_GF100_CHIPSET_D0 0xd0 +#define NVISA_GK104_CHIPSET 0xe0 +#define NVISA_GK110_CHIPSET 0xf0 + struct nv50_ir_prog_info { uint16_t target; /* chipset (0x50, 0x84, 0xc0, ...) */ diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h index ab4c98fbcd7..55a3332c727 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h @@ -73,6 +73,31 @@ static inline unsigned int typeSizeof(DataType ty) } } +static inline unsigned int typeSizeofLog2(DataType ty) +{ + switch (ty) { + case TYPE_F16: + case TYPE_U16: + case TYPE_S16: + return 1; + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + return 2; + case TYPE_F64: + case TYPE_U64: + case TYPE_S64: + return 3; + case TYPE_B96: + case TYPE_B128: + return 4; + case TYPE_U8: + case TYPE_S8: + default: + return 0; + } +} + static inline DataType typeOfSize(unsigned int size, bool flt = false, bool sgn = false) { diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp index 726331e91e7..13998767710 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp @@ -1907,6 +1907,7 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) texConstraintNVC0(tex); break; case 0xe0: + case NVISA_GK110_CHIPSET: texConstraintNVE0(tex); break; default: diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp index f718912fb39..707c9e8d219 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp @@ -121,6 +121,7 @@ Target *Target::create(unsigned int chipset) case 0xc0: case 0xd0: case 0xe0: + case NVISA_GK110_CHIPSET: return getTargetNVC0(chipset); case 0x50: case 0x80: diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources index d74ecf385f5..82504bf9849 100644 --- a/src/gallium/drivers/nvc0/Makefile.sources +++ b/src/gallium/drivers/nvc0/Makefile.sources @@ -16,6 +16,7 @@ C_SOURCES := \ nvc0_query.c CPP_SOURCES := \ + codegen/nv50_ir_emit_gk110.cpp \ codegen/nv50_ir_emit_nvc0.cpp \ codegen/nv50_ir_lowering_nvc0.cpp \ codegen/nv50_ir_target_nvc0.cpp diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_gk110.cpp new file mode 100644 index 00000000000..6c229fddf70 --- /dev/null +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_gk110.cpp @@ -0,0 +1,1628 @@ +/* + * Copyright 2012 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_ir_target_nvc0.h" + +// CodeEmitter for GK110 encoding of the Fermi/Kepler ISA. + +namespace nv50_ir { + +class CodeEmitterGK110 : public CodeEmitter +{ +public: + CodeEmitterGK110(const TargetNVC0 *); + + virtual bool emitInstruction(Instruction *); + virtual uint32_t getMinEncodingSize(const Instruction *) const; + virtual void prepareEmission(Function *); + + inline void setProgramType(Program::Type pType) { progType = pType; } + +private: + const TargetNVC0 *targ; + + Program::Type progType; + + const bool writeIssueDelays; + +private: + void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1); + void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg); + void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier); + + void emitPredicate(const Instruction *); + + void setCAddress14(const ValueRef&); + void setShortImmediate(const Instruction *, const int s); + void setImmediate32(const Instruction *, const int s, Modifier); + + void modNegAbsF32_3b(const Instruction *, const int s); + + void emitCondCode(CondCode cc, int pos, uint8_t mask); + void emitInterpMode(const Instruction *); + void emitLoadStoreType(DataType ty, const int pos); + void emitCachingMode(CacheMode c, const int pos); + + inline uint8_t getSRegEncoding(const ValueRef&); + + void emitRoundMode(RoundMode, const int pos, const int rintPos); + void emitRoundModeF(RoundMode, const int pos); + void emitRoundModeI(RoundMode, const int pos); + + void emitNegAbs12(const Instruction *); + + void emitNOP(const Instruction *); + + void emitLOAD(const Instruction *); + void emitSTORE(const Instruction *); + void emitMOV(const Instruction *); + + void emitINTERP(const Instruction *); + void emitPFETCH(const Instruction *); + void emitVFETCH(const Instruction *); + void emitEXPORT(const Instruction *); + void emitOUT(const Instruction *); + + void emitUADD(const Instruction *); + void emitFADD(const Instruction *); + void emitIMUL(const Instruction *); + void emitFMUL(const Instruction *); + void emitIMAD(const Instruction *); + void emitISAD(const Instruction *); + void emitFMAD(const Instruction *); + + void emitNOT(const Instruction *); + void emitLogicOp(const Instruction *, uint8_t subOp); + void emitPOPC(const Instruction *); + void emitINSBF(const Instruction *); + void emitShift(const Instruction *); + + void emitSFnOp(const Instruction *, uint8_t subOp); + + void emitCVT(const Instruction *); + void emitMINMAX(const Instruction *); + void emitPreOp(const Instruction *); + + void emitSET(const CmpInstruction *); + void emitSLCT(const CmpInstruction *); + void emitSELP(const Instruction *); + + void emitTEXBAR(const Instruction *); + void emitTEX(const TexInstruction *); + void emitTEXCSAA(const TexInstruction *); + void emitTXQ(const TexInstruction *); + void emitPIXLD(const TexInstruction *); + + void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); + + void emitFlow(const Instruction *); + + inline void defId(const ValueDef&, const int pos); + inline void srcId(const ValueRef&, const int pos); + inline void srcId(const ValueRef *, const int pos); + inline void srcId(const Instruction *, int s, const int pos); + + inline void srcAddr32(const ValueRef&, const int pos); // address / 4 + + inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false); +}; + +#define GK110_GPR_ZERO 255 + +#define NEG_(b, s) \ + if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) +#define ABS_(b, s) \ + if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) + +#define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \ + code[(0x##b) / 32] |= 1 << ((0x##b) % 32) + +#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) + +#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) + +#define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b) + +#define SDATA(a) ((a).rep()->reg.data) +#define DDATA(a) ((a).rep()->reg.data) + +void CodeEmitterGK110::srcId(const ValueRef& src, const int pos) +{ + code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32); +} + +void CodeEmitterGK110::srcId(const ValueRef *src, const int pos) +{ + code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32); +} + +void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos) +{ + int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO; + code[pos / 32] |= r << (pos % 32); +} + +void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos) +{ + code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32); +} + +void CodeEmitterGK110::defId(const ValueDef& def, const int pos) +{ + code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32); +} + +bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod) +{ + const ImmediateValue *imm = ref.get()->asImm(); + + return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); +} + +void +CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos) +{ + bool rint = false; + uint8_t n; + + switch (rnd) { + case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break; + case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break; + case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break; + default: + rint = rnd == ROUND_NI; + n = 0; + assert(rnd == ROUND_N || rnd == ROUND_NI); + break; + } + code[pos / 32] |= n << (pos % 32); + if (rint && rintPos >= 0) + code[rintPos / 32] |= 1 << (rintPos % 32); +} + +void +CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos) +{ + uint8_t n; + + switch (rnd) { + case ROUND_M: n = 1; break; + case ROUND_P: n = 2; break; + case ROUND_Z: n = 3; break; + default: + n = 0; + assert(rnd == ROUND_N); + break; + } + code[pos / 32] |= n << (pos % 32); +} + +void +CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos) +{ + uint8_t n; + + switch (rnd) { + case ROUND_MI: n = 1; break; + case ROUND_PI: n = 2; break; + case ROUND_ZI: n = 3; break; + default: + n = 0; + assert(rnd == ROUND_NI); + break; + } + code[pos / 32] |= n << (pos % 32); +} + +void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask) +{ + uint8_t n; + + switch (cc) { + case CC_FL: n = 0x00; break; + case CC_LT: n = 0x01; break; + case CC_EQ: n = 0x02; break; + case CC_LE: n = 0x03; break; + case CC_GT: n = 0x04; break; + case CC_NE: n = 0x05; break; + case CC_GE: n = 0x06; break; + case CC_LTU: n = 0x09; break; + case CC_EQU: n = 0x0a; break; + case CC_LEU: n = 0x0b; break; + case CC_GTU: n = 0x0c; break; + case CC_NEU: n = 0x0d; break; + case CC_GEU: n = 0x0e; break; + case CC_TR: n = 0x0f; break; + case CC_NO: n = 0x10; break; + case CC_NC: n = 0x11; break; + case CC_NS: n = 0x12; break; + case CC_NA: n = 0x13; break; + case CC_A: n = 0x14; break; + case CC_S: n = 0x15; break; + case CC_C: n = 0x16; break; + case CC_O: n = 0x17; break; + default: + n = 0; + assert(!"invalid condition code"); + break; + } + code[pos / 32] |= (n & mask) << (pos % 32); +} + +void +CodeEmitterGK110::emitPredicate(const Instruction *i) +{ + if (i->predSrc >= 0) { + srcId(i->src(i->predSrc), 18); + if (i->cc == CC_NOT_P) + code[0] |= 8 << 18; // negate + assert(i->getPredicate()->reg.file == FILE_PREDICATE); + } else { + code[0] |= 7 << 18; + } +} + +void +CodeEmitterGK110::setCAddress14(const ValueRef& src) +{ + const int32_t addr = src.get()->asSym()->reg.data.offset / 4; + + code[0] |= (addr & 0x01ff) << 23; + code[1] |= (addr & 0x3e00) >> 9; +} + +void +CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s) +{ + const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32; + const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64; + + if (i->sType == TYPE_F32) { + assert(!(u32 & 0x00000fff)); + code[0] |= ((u32 & 0x001ff000) >> 12) << 23; + code[1] |= ((u32 & 0x7fe00000) >> 21); + code[1] |= ((u32 & 0x80000000) >> 4); + } else + if (i->sType == TYPE_F64) { + assert(!(u64 & 0x00000fffffffffffULL)); + code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23; + code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53); + code[1] |= ((u64 & 0x8000000000000000ULL) >> 36); + } else { + assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); + code[0] |= (u32 & 0x001ff) << 23; + code[1] |= (u32 & 0x7fe00) >> 9; + code[1] |= (u32 & 0x80000) << 8; + } +} + +void +CodeEmitterGK110::setImmediate32(const Instruction *i, const int s, + Modifier mod) +{ + uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32; + + if (mod) { + ImmediateValue imm(i->getSrc(s)->asImm(), i->sType); + mod.applyTo(imm); + u32 = imm.reg.data.u32; + } + + code[0] |= u32 << 23; + code[1] |= u32 >> 9; +} + +void +CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg, + Modifier mod) +{ + code[0] = ctg; + code[1] = opc << 20; + + emitPredicate(i); + + defId(i->def(0), 2); + + for (int s = 0; s < 3 && i->srcExists(s); ++s) { + switch (i->src(s).getFile()) { + case FILE_GPR: + srcId(i->src(s), s ? 42 : 10); + break; + case FILE_IMMEDIATE: + setImmediate32(i, s, mod); + break; + default: + break; + } + } +} + + +void +CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg) +{ + code[0] = ctg; + code[1] = opc << 20; + + emitPredicate(i); + + defId(i->def(0), 2); + + switch (i->src(0).getFile()) { + case FILE_MEMORY_CONST: + code[1] |= 0x4 << 28; + setCAddress14(i->src(0)); + break; + case FILE_GPR: + code[1] |= 0xc << 28; + srcId(i->src(0), 23); + break; + default: + assert(0); + break; + } +} + +// 0x2 for GPR, c[] and 0x1 for short immediate +void +CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2, + uint32_t opc1) +{ + const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE; + + int s1 = 23; + if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST) + s1 = 42; + + if (imm) { + code[0] = 0x1; + code[1] = opc1 << 20; + } else { + code[0] = 0x2; + code[1] = (0xc << 28) | (opc2 << 20); + } + + emitPredicate(i); + + defId(i->def(0), 2); + + for (int s = 0; s < 3 && i->srcExists(s); ++s) { + switch (i->src(s).getFile()) { + case FILE_MEMORY_CONST: + code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28); + setCAddress14(i->src(s)); + code[1] |= i->getSrc(s)->reg.fileIndex << 5; + break; + case FILE_IMMEDIATE: + setShortImmediate(i, s); + break; + case FILE_GPR: + srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10); + break; + default: + // ignore here, can be predicate or flags, but must not be address + break; + } + } + // 0x0 = invalid + // 0xc = rrr + // 0x8 = rrc + // 0x4 = rcr + assert(imm || (code[1] & (0xc << 28))); +} + +inline void +CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s) +{ + if (i->src(s).mod.abs()) code[1] &= ~(1 << 27); + if (i->src(s).mod.neg()) code[1] ^= (1 << 27); +} + +void +CodeEmitterGK110::emitNOP(const Instruction *i) +{ + code[0] = 0x00003c02; + code[1] = 0x85800000; + + if (i) + emitPredicate(i); + else + code[0] = 0x001c3c02; +} + +void +CodeEmitterGK110::emitFMAD(const Instruction *i) +{ + assert(!isLIMM(i->src(1), TYPE_F32)); + + emitForm_21(i, 0x0c0, 0x940); + + NEG_(34, 2); + SAT_(35); + RND_(36, F); + FTZ_(38); + + bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); + + if (code[0] & 0x1) { + if (neg1) + code[1] ^= 1 << 27; + } else + if (neg1) { + code[1] |= 1 << 19; + } +} + +void +CodeEmitterGK110::emitFMUL(const Instruction *i) +{ + bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); + + assert(i->postFactor >= -3 && i->postFactor <= 3); + + if (isLIMM(i->src(1), TYPE_F32)) { + emitForm_L(i, 0x200, 0x2, Modifier(0)); + + FTZ_(38); + SAT_(3a); + if (neg) + code[1] ^= 1 << 22; + + assert(i->postFactor == 0); + } else { + emitForm_21(i, 0x234, 0xc34); + + RND_(2a, F); + FTZ_(2f); + SAT_(35); + + if (code[0] & 0x1) { + if (neg) + code[1] ^= 1 << 27; + } else + if (neg) { + code[1] |= 1 << 19; + } + } +} + +void +CodeEmitterGK110::emitIMUL(const Instruction *i) +{ + assert(!i->src(0).mod.neg() && !i->src(1).mod.neg()); + assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); + + if (isLIMM(i->src(1), TYPE_S32)) { + emitForm_L(i, 0x280, 2, Modifier(0)); + + assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH); + + if (i->sType == TYPE_S32) + code[1] |= 3 << 25; + } else { + emitForm_21(i, 0x21c, 0xc1c); + + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) + code[1] |= 1 << 10; + if (i->sType == TYPE_S32) + code[1] |= 3 << 11; + } +} + +void +CodeEmitterGK110::emitFADD(const Instruction *i) +{ + if (isLIMM(i->src(1), TYPE_F32)) { + assert(i->rnd == ROUND_N); + assert(!i->saturate); + + emitForm_L(i, 0x400, 0, i->src(1).mod); + + FTZ_(3a); + NEG_(3b, 0); + ABS_(39, 0); + } else { + emitForm_21(i, 0x22c, 0xc2c); + + FTZ_(2f); + RND_(2a, F); + ABS_(31, 0); + NEG_(33, 0); + + if (code[0] & 0x1) { + modNegAbsF32_3b(i, 1); + } else { + ABS_(34, 1); + NEG_(30, 1); + } + } +} + +void +CodeEmitterGK110::emitUADD(const Instruction *i) +{ + uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg(); + + if (i->op == OP_SUB) + addOp ^= 1; + + assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); + + if (isLIMM(i->src(1), TYPE_S32)) { + emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0)); + + if (addOp & 2) + code[1] |= 1 << 27; + + assert(!i->defExists(1)); + assert(i->flagsSrc < 0); + + SAT_(39); + } else { + emitForm_21(i, 0x208, 0xc08); + + assert(addOp != 3); // would be add-plus-one + + code[1] |= addOp << 19; + + if (i->defExists(1)) + code[1] |= 1 << 18; // write carry + if (i->flagsSrc >= 0) + code[1] |= 1 << 14; // add carry + + SAT_(35); + } +} + +// TODO: shl-add +void +CodeEmitterGK110::emitIMAD(const Instruction *i) +{ + uint8_t addOp = + (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg()); + + emitForm_21(i, 0x100, 0xa00); + + assert(addOp != 3); + code[1] |= addOp << 26; + + if (i->sType == TYPE_S32) + code[1] |= (1 << 19) | (1 << 24); + + if (code[0] & 0x1) { + assert(!i->subOp); + SAT_(39); + } else { + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) + code[1] |= 1 << 25; + SAT_(35); + } +} + +void +CodeEmitterGK110::emitISAD(const Instruction *i) +{ + assert(i->dType == TYPE_S32 || i->dType == TYPE_U32); + + emitForm_21(i, 0x1fc, 0xb74); + + if (i->dType == TYPE_S32) + code[1] |= 1 << 19; +} + +void +CodeEmitterGK110::emitNOT(const Instruction *i) +{ + code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src + code[1] = 0x22003800; + + emitPredicate(i); + + defId(i->def(0), 2); + + switch (i->src(0).getFile()) { + case FILE_GPR: + code[1] |= 0xc << 28; + srcId(i->src(0), 23); + break; + case FILE_MEMORY_CONST: + code[1] |= 0x4 << 28; + setCAddress14(i->src(1)); + break; + default: + assert(0); + break; + } +} + +void +CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp) +{ + assert(!(i->src(0).mod & Modifier(NV50_IR_MOD_NOT))); // XXX: find me + + if (isLIMM(i->src(1), TYPE_S32)) { + emitForm_L(i, 0x200, 0, i->src(1).mod); + code[1] |= subOp << 24; + } else { + emitForm_21(i, 0x220, 0xc20); + code[1] |= subOp << 12; + NOT_(2b, 1); + } + assert(!(code[0] & 0x1) || !(i->src(1).mod & Modifier(NV50_IR_MOD_NOT))); +} + +void +CodeEmitterGK110::emitPOPC(const Instruction *i) +{ + assert(!isLIMM(i->src(1), TYPE_S32, true)); + + emitForm_21(i, 0x204, 0xc04); + + NOT_(2a, 0); + if (!(code[0] & 0x1)) + NOT_(2b, 1); +} + +void +CodeEmitterGK110::emitINSBF(const Instruction *i) +{ + emitForm_21(i, 0x1f8, 0xb78); +} + +void +CodeEmitterGK110::emitShift(const Instruction *i) +{ + const bool sar = i->op == OP_SHR && isSignedType(i->sType); + + if (sar) { + emitForm_21(i, 0x214, 0x014); + code[1] |= 1 << 19; + } else + if (i->op == OP_SHR) { + // this is actually RSHF + emitForm_21(i, 0x27c, 0x87c); + code[1] |= GK110_GPR_ZERO << 10; + } else { + // this is actually LSHF + emitForm_21(i, 0x1fc, 0xb7c); + code[1] |= GK110_GPR_ZERO << 10; + } + + if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) { + if (!sar) + code[1] |= 1 << 21; + // XXX: find wrap modifier for SHR S32 + } +} + +void +CodeEmitterGK110::emitPreOp(const Instruction *i) +{ + emitForm_21(i, 0x248, -1); + + if (i->op == OP_PREEX2) + code[1] |= 1 << 10; + + NEG_(30, 0); + ABS_(34, 0); +} + +void +CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp) +{ + code[0] = 0x00000002 | (subOp << 23); + code[1] = 0x84000000; + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0), 10); + + NEG_(33, 0); + ABS_(31, 0); + + // XXX: find saturate +} + +void +CodeEmitterGK110::emitMINMAX(const Instruction *i) +{ + uint32_t op2, op1; + + switch (i->dType) { + case TYPE_U32: + case TYPE_S32: + op2 = 0x210; + op1 = 0xc10; + break; + case TYPE_F32: + op2 = 0x230; + op1 = 0xc30; + break; + case TYPE_F64: + op2 = 0x228; + op1 = 0xc28; + break; + default: + assert(0); + op2 = 0; + op1 = 0; + break; + } + emitForm_21(i, op2, op1); + + if (i->dType == TYPE_S32) + code[1] |= 1 << 19; + code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt + + FTZ_(2f); + ABS_(31, 0); + NEG_(33, 0); + if (code[0] & 0x1) { + modNegAbsF32_3b(i, 1); + } else { + ABS_(34, 1); + NEG_(30, 1); + } +} + +void +CodeEmitterGK110::emitCVT(const Instruction *i) +{ + const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); + const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType); + const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType); + + bool sat = i->saturate; + bool abs = i->src(0).mod.abs(); + bool neg = i->src(0).mod.neg(); + + RoundMode rnd = i->rnd; + + switch (i->op) { + case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; + case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; + case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; + case OP_SAT: sat = true; break; + case OP_NEG: neg = !neg; break; + case OP_ABS: abs = true; neg = false; break; + default: + break; + } + + uint32_t op; + + if (f2f) op = 0x254; + else if (f2i) op = 0x258; + else if (i2f) op = 0x25c; + else op = 0x260; + + emitForm_C(i, op, 0x2); + + FTZ_(2f); + if (neg) code[1] |= 1 << 16; + if (abs) code[1] |= 1 << 20; + if (sat) code[1] |= 1 << 21; + + emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1); + + code[0] |= typeSizeofLog2(i->dType) << 10; + code[0] |= typeSizeofLog2(i->sType) << 12; + + if (isSignedIntType(i->dType)) + code[0] |= 0x4000; + if (isSignedIntType(i->sType)) + code[0] |= 0x8000; +} + +void +CodeEmitterGK110::emitSET(const CmpInstruction *i) +{ + uint16_t op1, op2; + + if (i->def(0).getFile() == FILE_PREDICATE) { + switch (i->sType) { + case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break; + case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break; + default: + op2 = 0x1b0; + op1 = 0xb30; + break; + } + emitForm_21(i, op2, op1); + + NEG_(2e, 0); + ABS_(9, 0); + if (!(code[0] & 0x1)) { + NEG_(8, 1); + ABS_(2f, 1); + } else { + modNegAbsF32_3b(i, 1); + } + FTZ_(32); + + // normal DST field is negated predicate result + code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0); + if (i->defExists(1)) + defId(i->def(1), 2); + else + code[0] |= 0x1c; + } else { + switch (i->sType) { + case TYPE_F32: op2 = 0x000; op1 = 0x820; break; + case TYPE_F64: op2 = 0x080; op1 = 0x900; break; + default: + op2 = 0x1a8; + op1 = 0xb28; + break; + } + emitForm_21(i, op2, op1); + + NEG_(2e, 0); + ABS_(39, 0); + if (!(code[0] & 0x1)) { + NEG_(38, 1); + ABS_(2f, 1); + } else { + modNegAbsF32_3b(i, 1); + } + FTZ_(3a); + } + if (i->sType == TYPE_S32) + code[1] |= 1 << 19; + + if (i->op != OP_SET) { + switch (i->op) { + case OP_SET_AND: code[1] |= 0x0 << 16; break; + case OP_SET_OR: code[1] |= 0x1 << 16; break; + case OP_SET_XOR: code[1] |= 0x2 << 16; break; + default: + assert(0); + break; + } + srcId(i->src(2), 0x2a); + } else { + code[1] |= 0x7 << 10; + } + emitCondCode(i->setCond, + isFloatType(i->sType) ? 0x33 : 0x34, + isFloatType(i->sType) ? 0xf : 0x7); +} + +void +CodeEmitterGK110::emitSLCT(const CmpInstruction *i) +{ + CondCode cc = i->setCond; + if (i->src(2).mod.neg()) + cc = reverseCondCode(cc); + + if (i->dType == TYPE_F32) { + emitForm_21(i, 0x1d0, 0xb50); + FTZ_(32); + emitCondCode(cc, 0x33, 0xf); + } else { + emitForm_21(i, 0x1a4, 0xb20); + emitCondCode(cc, 0x34, 0x7); + } +} + +void CodeEmitterGK110::emitSELP(const Instruction *i) +{ + emitForm_21(i, 0x250, 0x050); + + if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT))) + code[1] |= 1 << 13; +} + +void CodeEmitterGK110::emitTEXBAR(const Instruction *i) +{ + code[0] = 0x00000002 | (i->subOp << 23); + code[1] = 0x77000000; + + emitPredicate(i); +} + +void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i) +{ + emitNOP(i); // TODO +} + +static inline bool +isNextIndependentTex(const TexInstruction *i) +{ + if (!i->next || !isTextureOp(i->next->op)) + return false; + if (i->getDef(0)->interfers(i->next->getSrc(0))) + return false; + return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1)); +} + +void +CodeEmitterGK110::emitTEX(const TexInstruction *i) +{ + const bool ind = i->tex.rIndirectSrc >= 0; + + if (ind) { + code[0] = 0x00000002; + switch (i->op) { + case OP_TXD: + code[1] = 0x7e000000; + break; + default: + code[1] = 0x7d800000; + break; + } + } else { + switch (i->op) { + case OP_TXD: + code[0] = 0x00000002; + code[1] = 0x76000000; + break; + default: + code[0] = 0x00000001; + code[1] = 0x60000000; + break; + } + code[1] |= i->tex.r << 15; + } + + code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode + + // if (i->tex.liveOnly) + // ? + + switch (i->op) { + case OP_TEX: break; + case OP_TXB: code[1] |= 0x2000; break; + case OP_TXL: code[1] |= 0x3000; break; + case OP_TXF: break; // XXX + case OP_TXG: break; // XXX + case OP_TXD: break; + default: + assert(!"invalid texture op"); + break; + } + /* + if (i->op == OP_TXF) { + if (!i->tex.levelZero) + code[1] |= 0x02000000; + } else */ + if (i->tex.levelZero) { + code[1] |= 0x1000; + } + + // if (i->op != OP_TXD && i->tex.derivAll) + // code[1] |= 1 << 13; + + emitPredicate(i); + + code[1] |= i->tex.mask << 2; + + const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) + + defId(i->def(0), 2); + srcId(i->src(0), 10); + srcId(i, src1, 23); + + // if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5; + + // texture target: + code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7; + if (i->tex.target.isArray()) + code[1] |= 0x40; + // if (i->tex.target.isShadow()) + // ? + // if (i->tex.target == TEX_TARGET_2D_MS || + // i->tex.target == TEX_TARGET_2D_MS_ARRAY) + // ? + + if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) { + // ? + } + + // if (i->tex.useOffsets) + // ? +} + +void +CodeEmitterGK110::emitTXQ(const TexInstruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitFlow(const Instruction *i) +{ + const FlowInstruction *f = i->asFlow(); + + unsigned mask; // bit 0: predicate, bit 1: target + + code[0] = 0x00000000; + + switch (i->op) { + case OP_BRA: + code[1] = f->absolute ? 0x00000 : 0x12000000; // XXX + // if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) + // code[0] |= 0x4000; + mask = 3; + break; + case OP_CALL: + code[1] = f->absolute ? 0x00000 : 0x13000000; // XXX + // if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) + // code[0] |= 0x4000; + mask = 2; + break; + + case OP_EXIT: code[1] = 0x18000000; mask = 1; break; + case OP_RET: code[1] = 0x19000000; mask = 1; break; + case OP_DISCARD: code[1] = 0x19800000; mask = 1; break; // XXX: guess + case OP_BREAK: code[1] = 0x1a800000; mask = 1; break; // XXX: guess + case OP_CONT: code[1] = 0x1b000000; mask = 1; break; // XXX: guess + + case OP_JOINAT: code[1] = 0x14800000; mask = 2; break; + case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break; // XXX: guess + case OP_PRECONT: code[1] = 0x15800000; mask = 2; break; // XXX: guess + case OP_PRERET: code[1] = 0x16000000; mask = 2; break; // XXX: guess + + case OP_QUADON: code[1] = 0x1c000000; mask = 0; break; // XXX: guess + case OP_QUADPOP: code[1] = 0x1c800000; mask = 0; break; // XXX: guess + case OP_BRKPT: code[1] = 0x1d000000; mask = 0; break; // XXX: guess + default: + assert(!"invalid flow operation"); + return; + } + + if (mask & 1) { + emitPredicate(i); + if (i->flagsSrc < 0) + code[0] |= 0x3c; + } + + if (!f) + return; + + // TODO + /* + if (f->allWarp) + code[0] |= 1 << 15; + if (f->limit) + code[0] |= 1 << 16; + */ + + if (f->op == OP_CALL) { + if (f->builtin) { + assert(f->absolute); + uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin); + addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23); + addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9); + } else { + assert(!f->absolute); + int32_t pcRel = f->target.fn->binPos - (codeSize + 8); + code[0] |= (pcRel & 0x1ff) << 23; + code[1] |= (pcRel >> 9) & 0x7fff; + } + } else + if (mask & 2) { + int32_t pcRel = f->target.bb->binPos - (codeSize + 8); + // currently we don't want absolute branches + assert(!f->absolute); + code[0] |= (pcRel & 0x1ff) << 23; + code[1] |= (pcRel >> 9) & 0x7fff; + } +} + +void +CodeEmitterGK110::emitPFETCH(const Instruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitVFETCH(const Instruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitEXPORT(const Instruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitOUT(const Instruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitInterpMode(const Instruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitINTERP(const Instruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos) +{ + uint8_t n; + + switch (ty) { + case TYPE_U8: + n = 0; + break; + case TYPE_S8: + n = 1; + break; + case TYPE_U16: + n = 2; + break; + case TYPE_S16: + n = 3; + break; + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + n = 4; + break; + case TYPE_F64: + case TYPE_U64: + case TYPE_S64: + n = 5; + break; + case TYPE_B128: + n = 6; + break; + default: + n = 0; + assert(!"invalid ld/st type"); + break; + } + code[pos / 32] |= n << (pos % 32); +} + +void +CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos) +{ + uint8_t n; + + switch (c) { + case CACHE_CA: +// case CACHE_WB: + n = 0; + break; + case CACHE_CG: + n = 1; + break; + case CACHE_CS: + n = 2; + break; + case CACHE_CV: +// case CACHE_WT: + n = 3; + break; + default: + n = 0; + assert(!"invalid caching mode"); + break; + } + code[pos / 32] |= n << (pos % 32); +} + +void +CodeEmitterGK110::emitSTORE(const Instruction *i) +{ + int32_t offset = SDATA(i->src(0)).offset; + + switch (i->src(0).getFile()) { + case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break; + case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; + case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break; + default: + assert(!"invalid memory file"); + break; + } + + if (i->src(0).getFile() != FILE_MEMORY_GLOBAL) + offset &= 0xffffff; + + if (code[0] & 0x2) { + emitLoadStoreType(i->dType, 0x33); + if (i->src(0).getFile() == FILE_MEMORY_LOCAL) + emitCachingMode(i->cache, 0x2f); + } else { + emitLoadStoreType(i->dType, 0x38); + emitCachingMode(i->cache, 0x3b); + } + code[0] |= offset << 23; + code[1] |= offset >> 9; + + emitPredicate(i); + + srcId(i->src(1), 2); + srcId(i->src(0).getIndirect(0), 10); +} + +void +CodeEmitterGK110::emitLOAD(const Instruction *i) +{ + int32_t offset = SDATA(i->src(0)).offset; + + switch (i->src(0).getFile()) { + case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break; + case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break; + case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break; + case FILE_MEMORY_CONST: + if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { + emitMOV(i); + return; + } + offset &= 0xffff; + code[0] = 0x00000002; + code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7); + break; + default: + assert(!"invalid memory file"); + break; + } + + if (code[0] & 0x2) { + offset &= 0xffffff; + emitLoadStoreType(i->dType, 0x33); + if (i->src(0).getFile() == FILE_MEMORY_LOCAL) + emitCachingMode(i->cache, 0x2f); + } else { + emitLoadStoreType(i->dType, 0x38); + emitCachingMode(i->cache, 0x3b); + } + code[0] |= offset << 23; + code[1] |= offset >> 9; + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0).getIndirect(0), 10); +} + +uint8_t +CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) +{ + switch (SDATA(ref).sv.sv) { + case SV_LANEID: return 0x00; + case SV_PHYSID: return 0x03; + case SV_VERTEX_COUNT: return 0x10; + case SV_INVOCATION_ID: return 0x11; + case SV_YDIR: return 0x12; + case SV_TID: return 0x21 + SDATA(ref).sv.index; + case SV_CTAID: return 0x25 + SDATA(ref).sv.index; + case SV_NTID: return 0x29 + SDATA(ref).sv.index; + case SV_GRIDID: return 0x2c; + case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; + case SV_LBASE: return 0x34; + case SV_SBASE: return 0x30; + case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; + default: + assert(!"no sreg for system value"); + return 0; + } +} + +void +CodeEmitterGK110::emitMOV(const Instruction *i) +{ + if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { + code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23); + code[1] = 0x86400000; + emitPredicate(i); + defId(i->def(0), 2); + } else + if (i->src(0).getFile() == FILE_IMMEDIATE) { + code[0] = 0x00000002 | (i->lanes << 14); + code[1] = 0x74000000; + emitPredicate(i); + defId(i->def(0), 2); + setImmediate32(i, 0, Modifier(0)); + } else + if (i->src(0).getFile() == FILE_PREDICATE) { + // TODO + } else { + emitForm_C(i, 0x24c, 2); + code[1] |= i->lanes << 10; + } +} + +bool +CodeEmitterGK110::emitInstruction(Instruction *insn) +{ + const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8; + + if (insn->encSize != 8) { + ERROR("skipping unencodable instruction: "); + insn->print(); + return false; + } else + if (codeSize + size > codeSizeLimit) { + ERROR("code emitter output buffer too small\n"); + return false; + } + + if (writeIssueDelays) { + int id = (codeSize & 0x3f) / 8 - 1; + if (id < 0) { + id += 1; + code[0] = 0x00000000; // cf issue delay "instruction" + code[1] = 0x08000000; + code += 2; + codeSize += 8; + } + uint32_t *data = code - (id * 2 + 2); + + switch (id) { + case 0: data[0] |= insn->sched << 2; break; + case 1: data[0] |= insn->sched << 10; break; + case 2: data[0] |= insn->sched << 18; break; + case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break; + case 4: data[1] |= insn->sched << 2; + case 5: data[1] |= insn->sched << 10; break; + case 6: data[1] |= insn->sched << 18; break; + default: + assert(0); + break; + } + } + + // assert that instructions with multiple defs don't corrupt registers + for (int d = 0; insn->defExists(d); ++d) + assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0); + + switch (insn->op) { + case OP_MOV: + case OP_RDSV: + emitMOV(insn); + break; + case OP_NOP: + break; + case OP_LOAD: + emitLOAD(insn); + break; + case OP_STORE: + emitSTORE(insn); + break; + case OP_LINTERP: + case OP_PINTERP: + emitINTERP(insn); + break; + case OP_VFETCH: + emitVFETCH(insn); + break; + case OP_EXPORT: + emitEXPORT(insn); + break; + case OP_PFETCH: + emitPFETCH(insn); + break; + case OP_EMIT: + case OP_RESTART: + emitOUT(insn); + break; + case OP_ADD: + case OP_SUB: + if (isFloatType(insn->dType)) + emitFADD(insn); + else + emitUADD(insn); + break; + case OP_MUL: + if (isFloatType(insn->dType)) + emitFMUL(insn); + else + emitIMUL(insn); + break; + case OP_MAD: + case OP_FMA: + if (isFloatType(insn->dType)) + emitFMAD(insn); + else + emitIMAD(insn); + break; + case OP_SAD: + emitISAD(insn); + break; + case OP_NOT: + emitNOT(insn); + break; + case OP_AND: + emitLogicOp(insn, 0); + break; + case OP_OR: + emitLogicOp(insn, 1); + break; + case OP_XOR: + emitLogicOp(insn, 2); + break; + case OP_SHL: + case OP_SHR: + emitShift(insn); + break; + case OP_SET: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + emitSET(insn->asCmp()); + break; + case OP_SELP: + emitSELP(insn); + break; + case OP_SLCT: + emitSLCT(insn->asCmp()); + break; + case OP_MIN: + case OP_MAX: + emitMINMAX(insn); + break; + case OP_ABS: + case OP_NEG: + case OP_CEIL: + case OP_FLOOR: + case OP_TRUNC: + case OP_CVT: + case OP_SAT: + emitCVT(insn); + break; + case OP_RSQ: + emitSFnOp(insn, 5); + break; + case OP_RCP: + emitSFnOp(insn, 4); + break; + case OP_LG2: + emitSFnOp(insn, 3); + break; + case OP_EX2: + emitSFnOp(insn, 2); + break; + case OP_SIN: + emitSFnOp(insn, 1); + break; + case OP_COS: + emitSFnOp(insn, 0); + break; + case OP_PRESIN: + case OP_PREEX2: + emitPreOp(insn); + break; + case OP_TEX: + case OP_TXB: + case OP_TXL: + case OP_TXD: + case OP_TXF: + emitTEX(insn->asTex()); + break; + case OP_TXQ: + emitTXQ(insn->asTex()); + break; + case OP_TEXBAR: + emitTEXBAR(insn); + break; + case OP_BRA: + case OP_CALL: + case OP_PRERET: + case OP_RET: + case OP_DISCARD: + case OP_EXIT: + case OP_PRECONT: + case OP_CONT: + case OP_PREBREAK: + case OP_BREAK: + case OP_JOINAT: + case OP_BRKPT: + case OP_QUADON: + case OP_QUADPOP: + emitFlow(insn); + break; + case OP_QUADOP: + emitQUADOP(insn, insn->subOp, insn->lanes); + break; + case OP_DFDX: + emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4); + break; + case OP_DFDY: + emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5); + break; + case OP_POPCNT: + emitPOPC(insn); + break; + case OP_JOIN: + emitNOP(insn); + insn->join = 1; + break; + case OP_PHI: + case OP_UNION: + case OP_CONSTRAINT: + ERROR("operation should have been eliminated"); + return false; + case OP_EXP: + case OP_LOG: + case OP_SQRT: + case OP_POW: + ERROR("operation should have been lowered\n"); + return false; + default: + ERROR("unknow op\n"); + return false; + } + + if (insn->join) + code[0] |= 1 << 22; + + code += 2; + codeSize += 8; + return true; +} + +uint32_t +CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const +{ + // No more short instruction encodings. + return 8; +} + +void +CodeEmitterGK110::prepareEmission(Function *func) +{ + const Target *targ = func->getProgram()->getTarget(); + + CodeEmitter::prepareEmission(func); + + if (targ->hasSWSched) + calculateSchedDataNVC0(targ, func); +} + +CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target) + : CodeEmitter(target), + writeIssueDelays(target->hasSWSched) +{ + code = NULL; + codeSize = codeSizeLimit = 0; + relocInfo = NULL; +} + +CodeEmitter * +TargetNVC0::createCodeEmitterGK110(Program::Type type) +{ + CodeEmitterGK110 *emit = new CodeEmitterGK110(this); + emit->setProgramType(type); + return emit; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp index 57d5d723c6a..cd04f4cfb97 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp @@ -2298,6 +2298,13 @@ SchedDataCalculator::recordRd(const Value *v, const int ready) } } +bool +calculateSchedDataNVC0(const Target *targ, Function *func) +{ + SchedDataCalculator sched(targ); + return sched.run(func, true, true); +} + void CodeEmitterNVC0::prepareEmission(Function *func) { @@ -2305,10 +2312,8 @@ CodeEmitterNVC0::prepareEmission(Function *func) CodeEmitter::prepareEmission(func); - if (targ->hasSWSched) { - SchedDataCalculator sched(targ); - sched.run(func, true, true); - } + if (targ->hasSWSched) + calculateSchedDataNVC0(targ, func); } CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) @@ -2321,11 +2326,19 @@ CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) } CodeEmitter * -TargetNVC0::getCodeEmitter(Program::Type type) +TargetNVC0::createCodeEmitterNVC0(Program::Type type) { CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this); emit->setProgramType(type); return emit; } +CodeEmitter * +TargetNVC0::getCodeEmitter(Program::Type type) +{ + if (chipset >= NVISA_GK110_CHIPSET) + return createCodeEmitterGK110(type); + return createCodeEmitterNVC0(type); +} + } // namespace nv50_ir diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index efb51249115..749ace5d81c 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -157,7 +157,7 @@ private: const Instruction *recurseDef(const Instruction *); private: - LValue *r63; + LValue *rZero; const bool needTexBar; }; @@ -467,8 +467,8 @@ NVC0LegalizePostRA::visit(Function *fn) if (needTexBar) insertTextureBarriers(fn); - r63 = new_LValue(fn, FILE_GPR); - r63->reg.data.id = 63; + rZero = new_LValue(fn, FILE_GPR); + rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); return true; } @@ -478,7 +478,7 @@ NVC0LegalizePostRA::replaceZero(Instruction *i) for (int s = 0; i->srcExists(s); ++s) { ImmediateValue *imm = i->getSrc(s)->asImm(); if (imm && imm->reg.data.u64 == 0) - i->setSrc(s, r63); + i->setSrc(s, rZero); } } @@ -556,7 +556,7 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) if (!i->getDef(0)->refCount()) i->setDef(0, NULL); if (i->src(0).getFile() == FILE_IMMEDIATE) - i->setSrc(0, r63); // initial value must be 0 + i->setSrc(0, rZero); // initial value must be 0 } else if (i->isNop()) { bb->remove(i); diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index 0674f12704d..e3db4b2318d 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -334,7 +334,7 @@ TargetNVC0::getFileSize(DataFile file) const { switch (file) { case FILE_NULL: return 0; - case FILE_GPR: return 63; + case FILE_GPR: return (chipset >= NVISA_GK110_CHIPSET) ? 255 : 63; case FILE_PREDICATE: return 7; case FILE_FLAGS: return 1; case FILE_ADDRESS: return 0; @@ -392,7 +392,7 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s, { DataFile sf = ld->src(0).getFile(); - // immediate 0 can be represented by GPR $r63 + // immediate 0 can be represented by GPR $r63/$r255 if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE); diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h index d859388dfdf..92a9a2a3c6a 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h @@ -38,6 +38,9 @@ public: virtual CodeEmitter *getCodeEmitter(Program::Type); + CodeEmitter *createCodeEmitterNVC0(Program::Type); + CodeEmitter *createCodeEmitterGK110(Program::Type); + virtual bool runLegalizePass(Program *, CGStage stage) const; virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; @@ -64,7 +67,8 @@ public: private: void initOpInfo(); - }; +bool calculateSchedDataNVC0(const Target *, Function *); + } // namespace nv50_ir -- cgit v1.2.3