summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2017-02-04 22:31:04 -0500
committerIlia Mirkin <imirkin@alum.mit.edu>2017-02-09 12:57:48 -0500
commit1aefd6159c07cd5b646ce99afd96d4500020418a (patch)
treec60b7c400f5a67b19c2ccb2cf5d96b8e13492ce6 /src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
parent009c54aa7af1cc1c0eeb03939ce616957dab67fc (diff)
nvc0/ir: add support for all the new int64 tgsi opcodes
A few thoughts: - Some of that LegalizeSSA logic should really live much earlier and be subject to the likes of DCE and other useful passes - Some of the "lowering" done in from_tgsi should be done later so that proper optimization might be done. However this all works and the above can be improved upon later. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp222
1 files changed, 221 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 6320e529980..80cc7fa01ac 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -354,6 +354,14 @@ unsigned int Instruction::srcMask(unsigned int s) const
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
switch (util_bitcount(mask)) {
case 1: return 0x3;
case 2: return 0xf;
@@ -557,6 +565,7 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_U2I64:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_UMOD:
@@ -587,6 +596,7 @@ nv50_ir::DataType Instruction::inferSrcType() const
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_I2I64:
case TGSI_OPCODE_IDIV:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_IMAX:
@@ -608,6 +618,8 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_D2I64:
+ case TGSI_OPCODE_D2U64:
case TGSI_OPCODE_DABS:
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_DADD:
@@ -630,6 +642,34 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_DFLR:
case TGSI_OPCODE_DROUND:
return nv50_ir::TYPE_F64;
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_U64MIN:
+ case TGSI_OPCODE_U64MAX:
+ case TGSI_OPCODE_U64ADD:
+ case TGSI_OPCODE_U64MUL:
+ case TGSI_OPCODE_U64SHL:
+ case TGSI_OPCODE_U64SHR:
+ case TGSI_OPCODE_U64DIV:
+ case TGSI_OPCODE_U64MOD:
+ case TGSI_OPCODE_U642F:
+ case TGSI_OPCODE_U642D:
+ return nv50_ir::TYPE_U64;
+ case TGSI_OPCODE_I64ABS:
+ case TGSI_OPCODE_I64SSG:
+ case TGSI_OPCODE_I64NEG:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_I64MIN:
+ case TGSI_OPCODE_I64MAX:
+ case TGSI_OPCODE_I64SHR:
+ case TGSI_OPCODE_I64DIV:
+ case TGSI_OPCODE_I64MOD:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_I642D:
+ return nv50_ir::TYPE_S64;
default:
return nv50_ir::TYPE_F32;
}
@@ -650,17 +690,35 @@ nv50_ir::DataType Instruction::inferDstType() const
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
case TGSI_OPCODE_PK2H:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
case TGSI_OPCODE_UP2H:
return nv50_ir::TYPE_F32;
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D:
+ case TGSI_OPCODE_I642D:
+ case TGSI_OPCODE_U642D:
return nv50_ir::TYPE_F64;
+ case TGSI_OPCODE_I2I64:
+ case TGSI_OPCODE_U2I64:
+ case TGSI_OPCODE_F2I64:
+ case TGSI_OPCODE_D2I64:
+ return nv50_ir::TYPE_S64;
+ case TGSI_OPCODE_F2U64:
+ case TGSI_OPCODE_D2U64:
+ return nv50_ir::TYPE_U64;
default:
return inferSrcType();
}
@@ -676,6 +734,8 @@ nv50_ir::CondCode Instruction::getSetCond() const
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_FSLT:
case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_U64SLT:
return CC_LT;
case TGSI_OPCODE_SLE:
return CC_LE;
@@ -684,6 +744,8 @@ nv50_ir::CondCode Instruction::getSetCond() const
case TGSI_OPCODE_USGE:
case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SGE:
return CC_GE;
case TGSI_OPCODE_SGT:
return CC_GT;
@@ -691,10 +753,12 @@ nv50_ir::CondCode Instruction::getSetCond() const
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_U64SEQ:
return CC_EQ;
case TGSI_OPCODE_SNE:
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_U64SNE:
return CC_NEU;
case TGSI_OPCODE_USNE:
return CC_NE;
@@ -832,6 +896,35 @@ static nv50_ir::operation translateOpcode(uint opcode)
NV50_IR_OPCODE_CASE(DFLR, FLOOR);
NV50_IR_OPCODE_CASE(DROUND, CVT);
+ NV50_IR_OPCODE_CASE(U64SEQ, SET);
+ NV50_IR_OPCODE_CASE(U64SNE, SET);
+ NV50_IR_OPCODE_CASE(U64SLT, SET);
+ NV50_IR_OPCODE_CASE(U64SGE, SET);
+ NV50_IR_OPCODE_CASE(I64SLT, SET);
+ NV50_IR_OPCODE_CASE(I64SGE, SET);
+ NV50_IR_OPCODE_CASE(I2I64, CVT);
+ NV50_IR_OPCODE_CASE(U2I64, CVT);
+ NV50_IR_OPCODE_CASE(F2I64, CVT);
+ NV50_IR_OPCODE_CASE(F2U64, CVT);
+ NV50_IR_OPCODE_CASE(D2I64, CVT);
+ NV50_IR_OPCODE_CASE(D2U64, CVT);
+ NV50_IR_OPCODE_CASE(I642F, CVT);
+ NV50_IR_OPCODE_CASE(U642F, CVT);
+ NV50_IR_OPCODE_CASE(I642D, CVT);
+ NV50_IR_OPCODE_CASE(U642D, CVT);
+
+ NV50_IR_OPCODE_CASE(I64MIN, MIN);
+ NV50_IR_OPCODE_CASE(U64MIN, MIN);
+ NV50_IR_OPCODE_CASE(I64MAX, MAX);
+ NV50_IR_OPCODE_CASE(U64MAX, MAX);
+ NV50_IR_OPCODE_CASE(I64ABS, ABS);
+ NV50_IR_OPCODE_CASE(I64NEG, NEG);
+ NV50_IR_OPCODE_CASE(U64ADD, ADD);
+ NV50_IR_OPCODE_CASE(U64MUL, MUL);
+ NV50_IR_OPCODE_CASE(U64SHL, SHL);
+ NV50_IR_OPCODE_CASE(I64SHR, SHR);
+ NV50_IR_OPCODE_CASE(U64SHR, SHR);
+
NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
@@ -3721,6 +3814,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_INTERP_OFFSET:
handleINTERP(dst0);
break;
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_D2F: {
@@ -3737,16 +3832,79 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
}
break;
}
+ case TGSI_OPCODE_I2I64:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ dst0[c] = fetchSrc(0, c / 2);
+ mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_U2I64:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ dst0[c] = fetchSrc(0, c / 2);
+ dst0[c + 1] = zero;
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_F2I64:
+ case TGSI_OPCODE_F2U64:
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
Value *dreg = getSSA(8);
- mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
+ Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
+ if (!isFloatType(dstTy))
+ cvt->rnd = ROUND_Z;
mkSplit(&dst0[c], 4, dreg);
c++;
}
break;
+ case TGSI_OPCODE_D2I64:
+ case TGSI_OPCODE_D2U64:
+ case TGSI_OPCODE_I642D:
+ case TGSI_OPCODE_U642D:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
+ if (!isFloatType(dstTy))
+ cvt->rnd = ROUND_Z;
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_I64NEG:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ mkOp2(OP_SUB, dstTy, dst, zero, src0);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_I64ABS:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *neg = getSSA(8), *srcComp[2], *negComp[2];
+ srcComp[0] = fetchSrc(0, c);
+ srcComp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
+ mkOp2(OP_SUB, dstTy, neg, zero, src0);
+ mkSplit(negComp, 4, neg);
+ mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
+ negComp[0], srcComp[0], srcComp[1]);
+ mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
+ negComp[1], srcComp[1], srcComp[1]);
+ c++;
+ }
+ break;
case TGSI_OPCODE_DABS:
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_DRCP:
@@ -3779,6 +3937,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
c++;
}
break;
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
@@ -3800,6 +3964,46 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
}
break;
}
+ case TGSI_OPCODE_U64MIN:
+ case TGSI_OPCODE_U64MAX:
+ case TGSI_OPCODE_I64MIN:
+ case TGSI_OPCODE_I64MAX: {
+ dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ Value *flag = getSSA(1, FILE_FLAGS);
+ src0 = fetchSrc(0, c + 1);
+ src1 = fetchSrc(1, c + 1);
+ geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
+ geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
+ geni->setFlagsDef(1, flag);
+
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
+ geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
+ geni->setFlagsSrc(2, flag);
+
+ c++;
+ }
+ break;
+ }
+ case TGSI_OPCODE_U64SHL:
+ case TGSI_OPCODE_I64SHR:
+ case TGSI_OPCODE_U64SHR:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ src1 = fetchSrc(1, c / 2);
+ mkOp2(op, dstTy, dst, src0, src1);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_U64ADD:
+ case TGSI_OPCODE_U64MUL:
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DMUL:
case TGSI_OPCODE_DDIV:
@@ -3873,6 +4077,22 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
mkSplit(&dst0[c], 4, dst);
c++;
}
+ case TGSI_OPCODE_I64SSG:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+
+ val0 = getScratch();
+ val1 = getScratch();
+ mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
+ mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
+ mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
+ mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
+ c++;
+ }
break;
default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());