From f2cc694d8ee067467c946999138637b00a7a6158 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 6 Jul 2018 21:21:28 +0100 Subject: nvc0/ir: use the combined tid special register total instructions in shared programs : 5804448 -> 5804690 (0.00%) total gprs used in shared programs : 670065 -> 670065 (0.00%) total shared used in shared programs : 548832 -> 548832 (0.00%) total local used in shared programs : 21068 -> 21068 (0.00%) local shared gpr inst bytes helped 0 0 0 5 5 hurt 0 0 0 191 191 Signed-off-by: Rhys Perry Reviewed-by: Karol Herbst --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 1 + .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 1 + .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 + .../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 3 ++ .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 12 +++++++ .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 40 ++++++++++++++++++++++ .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + 9 files changed, 61 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index f4f3c708886..0b220cc48de 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -453,6 +453,7 @@ enum SVSemantic SV_TESS_INNER, SV_TESS_COORD, SV_TID, + SV_COMBINED_TID, SV_CTAID, SV_NTID, SV_GRIDID, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 647d1a5d0ef..2118c3153f7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -2297,6 +2297,7 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) case SV_INVOCATION_ID: return 0x11; case SV_YDIR: return 0x12; case SV_THREAD_KILL: return 0x13; + case SV_COMBINED_TID: return 0x20; case SV_TID: return 0x21 + SDATA(ref).sv.index; case SV_CTAID: return 0x25 + SDATA(ref).sv.index; case SV_NTID: return 0x29 + SDATA(ref).sv.index; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 26826d63606..694d1b10a3c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -267,6 +267,7 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val) case SV_INVOCATION_ID : id = 0x11; break; case SV_THREAD_KILL : id = 0x13; break; case SV_INVOCATION_INFO: id = 0x1d; break; + case SV_COMBINED_TID : id = 0x20; break; case SV_TID : id = 0x21 + val->reg.data.sv.index; break; case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; case SV_LANEMASK_EQ : id = 0x38; break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index d85fdda56ff..b6e35dd0ee4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1990,6 +1990,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref) case SV_INVOCATION_ID: return 0x11; case SV_YDIR: return 0x12; case SV_THREAD_KILL: return 0x13; + case SV_COMBINED_TID: return 0x20; case SV_TID: return 0x21 + SDATA(ref).sv.index; case SV_CTAID: return 0x25 + SDATA(ref).sv.index; case SV_NTID: return 0x29 + SDATA(ref).sv.index; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 36ab837f6e2..1f0fd466a99 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -1201,6 +1201,9 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) bld.mkMov(def, bld.mkImm(0)); } break; + case SV_COMBINED_TID: + bld.mkMov(def, tid); + break; case SV_SAMPLE_POS: { Value *off = new_LValue(func, FILE_ADDRESS); bld.mkOp1(OP_RDSV, TYPE_U32, def, bld.mkSysVal(SV_SAMPLE_INDEX, 0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 597dcdffbe2..1410cf26c87 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2576,6 +2576,18 @@ NVC0LoweringPass::handleRDSV(Instruction *i) // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID i->op = OP_MOV; i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); + } else + if (sv == SV_TID) { + // Help CSE combine TID fetches + Value *tid = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), + bld.mkSysVal(SV_COMBINED_TID, 0)); + i->op = OP_EXTBF; + i->setSrc(0, tid); + switch (sym->reg.data.sv.index) { + case 0: i->setSrc(1, bld.mkImm(0x1000)); break; + case 1: i->setSrc(1, bld.mkImm(0x0a10)); break; + case 2: i->setSrc(1, bld.mkImm(0x061a)); break; + } } if (sv == SV_VERTEX_COUNT) { bld.setPosition(i, true); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 39177bd044b..e0faf8501bf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1654,6 +1654,7 @@ ModifierFolding::visit(BasicBlock *bb) // SLCT(a, b, const) -> cc(const) ? a : b // RCP(RCP(a)) -> a // MUL(MUL(a, b), const) -> MUL_Xconst(a, b) +// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID) class AlgebraicOpt : public Pass { private: @@ -1671,6 +1672,7 @@ private: void handleCVT_EXTBF(Instruction *); void handleSUCLAMP(Instruction *); void handleNEG(Instruction *); + void handleEXTBF_RDSV(Instruction *); BuildUtil bld; }; @@ -2175,6 +2177,41 @@ AlgebraicOpt::handleNEG(Instruction *i) { } } +// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID) +void +AlgebraicOpt::handleEXTBF_RDSV(Instruction *i) +{ + Instruction *rdsv = i->getSrc(0)->getUniqueInsn(); + if (rdsv->op != OP_RDSV || + rdsv->getSrc(0)->asSym()->reg.data.sv.sv != SV_COMBINED_TID) + return; + // Avoid creating more RDSV instructions + if (rdsv->getDef(0)->refCount() > 1) + return; + + ImmediateValue imm; + if (!i->src(1).getImmediate(imm)) + return; + + int index; + if (imm.isInteger(0x1000)) + index = 0; + else + if (imm.isInteger(0x0a10)) + index = 1; + else + if (imm.isInteger(0x061a)) + index = 2; + else + return; + + bld.setPosition(i, false); + + i->op = OP_RDSV; + i->setSrc(0, bld.mkSysVal(SV_TID, index)); + i->setSrc(1, NULL); +} + bool AlgebraicOpt::visit(BasicBlock *bb) { @@ -2215,6 +2252,9 @@ AlgebraicOpt::visit(BasicBlock *bb) case OP_NEG: handleNEG(i); break; + case OP_EXTBF: + handleEXTBF_RDSV(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index cbb21f5f721..ee3506fbaee 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -306,6 +306,7 @@ static const char *SemanticStr[SV_LAST + 1] = "TESS_INNER", "TESS_COORD", "TID", + "COMBINED_TID", "CTAID", "NTID", "GRIDID", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index dc73231394a..1ad3467337c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -257,6 +257,7 @@ TargetNV50::getSVAddress(DataFile shaderFile, const Symbol *sym) const case SV_NTID: return 0x2 + 2 * sym->reg.data.sv.index; case SV_TID: + case SV_COMBINED_TID: return 0; case SV_SAMPLE_POS: return 0; /* sample position is handled differently */ -- cgit v1.2.3