summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2018-06-14 19:56:28 -0600
committerBrian Paul <brianp@vmware.com>2018-06-14 20:09:45 -0600
commit66ca7e400b8cf736943feddafef7f76adabf9120 (patch)
treea338e1c9fd3ced8b144bedbbebcba8fd52c91135 /src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
parent9f217facbde04dd005b3f6b53bc97480b856d246 (diff)
nvc0: add support for programmable sample locations
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp102
1 files changed, 92 insertions, 10 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 29f674b4514..5723847234e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
break;
case SV_SAMPLE_POS: {
- Value *off = new_LValue(func, FILE_GPR);
- ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
+ Value *sampleID = bld.getScratch();
+ ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
- bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3));
- bld.mkLoad(TYPE_F32,
- i->getDef(0),
- bld.mkSymbol(
- FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
- TYPE_U32, prog->driver->io.sampleInfoBase +
- 4 * sym->reg.data.sv.index),
- off);
+ Value *offset = calculateSampleOffset(sampleID);
+
+ assert(prog->driver->prop.fp.readsSampleLocations);
+
+ if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase),
+ offset);
+ bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0),
+ bld.mkImm(0x040c + sym->reg.data.sv.index * 16));
+ bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0));
+ bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f / 16.0f));
+ } else {
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase +
+ 4 * sym->reg.data.sv.index),
+ offset);
+ }
break;
}
case SV_SAMPLE_MASK: {
@@ -2832,6 +2848,69 @@ NVC0LoweringPass::handleOUT(Instruction *i)
return true;
}
+Value *
+NVC0LoweringPass::calculateSampleOffset(Value *sampleID)
+{
+ Value *offset = bld.getScratch();
+ if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
+ // Sample location offsets (in bytes) are calculated like so:
+ // offset = (SV_POSITION.y % 4 * 2) + (SV_POSITION.x % 2)
+ // offset = offset * 32 + sampleID % 8 * 4;
+ // which is equivalent to:
+ // offset = (SV_POSITION.y & 0x3) << 6 + (SV_POSITION.x & 0x1) << 5;
+ // offset += sampleID << 2
+
+ // The second operand (src1) of the INSBF instructions are like so:
+ // 0xssll where ss is the size and ll is the offset.
+ // so: dest = src2 | (src0 & (1 << ss - 1)) << ll
+
+ // Add sample ID (offset = (sampleID & 0x7) << 2)
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0));
+
+ Symbol *xSym = bld.mkSysVal(SV_POSITION, 0);
+ Symbol *ySym = bld.mkSysVal(SV_POSITION, 1);
+ Value *coord = bld.getScratch();
+
+ // Add X coordinate (offset |= (SV_POSITION.x & 0x1) << 5)
+ bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
+ targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL);
+ bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
+ ->rnd = ROUND_ZI;
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset);
+
+ // Add Y coordinate (offset |= (SV_POSITION.y & 0x3) << 6)
+ bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
+ targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL);
+ bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
+ ->rnd = ROUND_ZI;
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset);
+ } else {
+ bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3));
+ }
+ return offset;
+}
+
+// Handle programmable sample locations for GM20x+
+void
+NVC0LoweringPass::handlePIXLD(Instruction *i)
+{
+ if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET)
+ return;
+ if (targ->getChipset() < NVISA_GM200_CHIPSET)
+ return;
+
+ assert(prog->driver->prop.fp.readsSampleLocations);
+
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase),
+ calculateSampleOffset(i->getSrc(0)));
+
+ bld.getBB()->remove(i);
+}
+
// Generate a binary predicate if an instruction is predicated by
// e.g. an f32 value.
void
@@ -2931,6 +3010,9 @@ NVC0LoweringPass::visit(Instruction *i)
case OP_BUFQ:
handleBUFQ(i);
break;
+ case OP_PIXLD:
+ handlePIXLD(i);
+ break;
default:
break;
}