summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/codegen
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2018-07-06 21:21:28 +0100
committerKarol Herbst <kherbst@redhat.com>2018-07-07 20:31:56 +0200
commitf2cc694d8ee067467c946999138637b00a7a6158 (patch)
tree6bf6bc3545a74b2cad186f9823e19ba8ce56fd61 /src/gallium/drivers/nouveau/codegen
parent6e885611565cc043c82417762a1e696c4f516e04 (diff)
nvc0/ir: use the combined tid special register
total instructions in shared programs : 5804448 -> 5804690 (0.00%) total gprs used in shared programs : 670065 -> 670065 (0.00%) total shared used in shared programs : 548832 -> 548832 (0.00%) total local used in shared programs : 21068 -> 21068 (0.00%) local shared gpr inst bytes helped 0 0 0 5 5 hurt 0 0 0 191 191 Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Karol Herbst <kherbst@redhat.com>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp12
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp40
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp1
9 files changed, 61 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index f4f3c708886..0b220cc48de 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -453,6 +453,7 @@ enum SVSemantic
SV_TESS_INNER,
SV_TESS_COORD,
SV_TID,
+ SV_COMBINED_TID,
SV_CTAID,
SV_NTID,
SV_GRIDID,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 647d1a5d0ef..2118c3153f7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -2297,6 +2297,7 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
case SV_INVOCATION_ID: return 0x11;
case SV_YDIR: return 0x12;
case SV_THREAD_KILL: return 0x13;
+ case SV_COMBINED_TID: return 0x20;
case SV_TID: return 0x21 + SDATA(ref).sv.index;
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 26826d63606..694d1b10a3c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -267,6 +267,7 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val)
case SV_INVOCATION_ID : id = 0x11; break;
case SV_THREAD_KILL : id = 0x13; break;
case SV_INVOCATION_INFO: id = 0x1d; break;
+ case SV_COMBINED_TID : id = 0x20; break;
case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
case SV_LANEMASK_EQ : id = 0x38; break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index d85fdda56ff..b6e35dd0ee4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1990,6 +1990,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
case SV_INVOCATION_ID: return 0x11;
case SV_YDIR: return 0x12;
case SV_THREAD_KILL: return 0x13;
+ case SV_COMBINED_TID: return 0x20;
case SV_TID: return 0x21 + SDATA(ref).sv.index;
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 36ab837f6e2..1f0fd466a99 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -1201,6 +1201,9 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i)
bld.mkMov(def, bld.mkImm(0));
}
break;
+ case SV_COMBINED_TID:
+ bld.mkMov(def, tid);
+ break;
case SV_SAMPLE_POS: {
Value *off = new_LValue(func, FILE_ADDRESS);
bld.mkOp1(OP_RDSV, TYPE_U32, def, bld.mkSysVal(SV_SAMPLE_INDEX, 0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 597dcdffbe2..1410cf26c87 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2576,6 +2576,18 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
// TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID
i->op = OP_MOV;
i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0));
+ } else
+ if (sv == SV_TID) {
+ // Help CSE combine TID fetches
+ Value *tid = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(),
+ bld.mkSysVal(SV_COMBINED_TID, 0));
+ i->op = OP_EXTBF;
+ i->setSrc(0, tid);
+ switch (sym->reg.data.sv.index) {
+ case 0: i->setSrc(1, bld.mkImm(0x1000)); break;
+ case 1: i->setSrc(1, bld.mkImm(0x0a10)); break;
+ case 2: i->setSrc(1, bld.mkImm(0x061a)); break;
+ }
}
if (sv == SV_VERTEX_COUNT) {
bld.setPosition(i, true);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 39177bd044b..e0faf8501bf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1654,6 +1654,7 @@ ModifierFolding::visit(BasicBlock *bb)
// SLCT(a, b, const) -> cc(const) ? a : b
// RCP(RCP(a)) -> a
// MUL(MUL(a, b), const) -> MUL_Xconst(a, b)
+// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID)
class AlgebraicOpt : public Pass
{
private:
@@ -1671,6 +1672,7 @@ private:
void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
void handleNEG(Instruction *);
+ void handleEXTBF_RDSV(Instruction *);
BuildUtil bld;
};
@@ -2175,6 +2177,41 @@ AlgebraicOpt::handleNEG(Instruction *i) {
}
}
+// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID)
+void
+AlgebraicOpt::handleEXTBF_RDSV(Instruction *i)
+{
+ Instruction *rdsv = i->getSrc(0)->getUniqueInsn();
+ if (rdsv->op != OP_RDSV ||
+ rdsv->getSrc(0)->asSym()->reg.data.sv.sv != SV_COMBINED_TID)
+ return;
+ // Avoid creating more RDSV instructions
+ if (rdsv->getDef(0)->refCount() > 1)
+ return;
+
+ ImmediateValue imm;
+ if (!i->src(1).getImmediate(imm))
+ return;
+
+ int index;
+ if (imm.isInteger(0x1000))
+ index = 0;
+ else
+ if (imm.isInteger(0x0a10))
+ index = 1;
+ else
+ if (imm.isInteger(0x061a))
+ index = 2;
+ else
+ return;
+
+ bld.setPosition(i, false);
+
+ i->op = OP_RDSV;
+ i->setSrc(0, bld.mkSysVal(SV_TID, index));
+ i->setSrc(1, NULL);
+}
+
bool
AlgebraicOpt::visit(BasicBlock *bb)
{
@@ -2215,6 +2252,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
case OP_NEG:
handleNEG(i);
break;
+ case OP_EXTBF:
+ handleEXTBF_RDSV(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index cbb21f5f721..ee3506fbaee 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -306,6 +306,7 @@ static const char *SemanticStr[SV_LAST + 1] =
"TESS_INNER",
"TESS_COORD",
"TID",
+ "COMBINED_TID",
"CTAID",
"NTID",
"GRIDID",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index dc73231394a..1ad3467337c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -257,6 +257,7 @@ TargetNV50::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_NTID:
return 0x2 + 2 * sym->reg.data.sv.index;
case SV_TID:
+ case SV_COMBINED_TID:
return 0;
case SV_SAMPLE_POS:
return 0; /* sample position is handled differently */