summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2021-03-20 23:40:00 -0400
committerMarge Bot <eric+marge@anholt.net>2021-05-01 20:04:21 +0000
commitb53b96a86a13cc321ef8812b9ec96c2c229e8e5c (patch)
tree5670bcdc202eecfa52ba81d0c92db7633d1605aa
parentf451854f39f580c6c95a574428498e32ffb6e840 (diff)
nv50: add support for doing membars
This requires an address that's safe to read from. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10164>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp39
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp7
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp5
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c6
7 files changed, 63 insertions, 6 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 8bf0809fa10..83aae296fbb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -125,6 +125,9 @@ struct nv50_ir_prog_info
uint8_t msInfoCBSlot; /* cX[] used for multisample info */
uint16_t msInfoBase; /* base address for multisample info */
uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */
+
+ uint16_t membarOffset; /* base address for membar reads (nv50) */
+ uint8_t gmemMembar; /* gX[] on which to perform membar reads (nv50) */
} io;
/* driver callback to assign input/output locations */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 9ebb780bfff..2b09855b19f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -647,6 +647,7 @@ private:
bool handleEXPORT(Instruction *);
bool handleLOAD(Instruction *);
bool handleLDST(Instruction *);
+ bool handleMEMBAR(Instruction *);
bool handleSharedATOM(Instruction *);
bool handleSULDP(TexInstruction *);
bool handleSUREDP(TexInstruction *);
@@ -1619,6 +1620,42 @@ NV50LoweringPreSSA::handleLDST(Instruction *i)
return true;
}
+bool
+NV50LoweringPreSSA::handleMEMBAR(Instruction *i)
+{
+ // For global memory, apparently doing a bunch of reads at different
+ // addresses forces things to get sufficiently flushed.
+ if (i->subOp & NV50_IR_SUBOP_MEMBAR_GL) {
+ uint8_t b = prog->driver->io.auxCBSlot;
+ Value *base =
+ bld.mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32,
+ prog->driver->io.membarOffset), NULL);
+ Value *physid = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getSSA(), bld.mkSysVal(SV_PHYSID, 0));
+ Value *off = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+ bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(),
+ physid, bld.loadImm(NULL, 0x1f)),
+ bld.loadImm(NULL, 2));
+ base = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), base, off);
+ Symbol *gmemMembar = bld.mkSymbol(FILE_MEMORY_GLOBAL, prog->driver->io.gmemMembar, TYPE_U32, 0);
+ for (int i = 0; i < 8; i++) {
+ if (i != 0) {
+ base = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), base, bld.loadImm(NULL, 0x100));
+ }
+ bld.mkLoad(TYPE_U32, bld.getSSA(), gmemMembar, base)
+ ->fixed = 1;
+ }
+ }
+
+ // Both global and shared memory barriers also need a regular control bar
+ // TODO: double-check this is the case
+ i->op = OP_BAR;
+ i->subOp = NV50_IR_SUBOP_BAR_SYNC;
+ i->setSrc(0, bld.mkImm(0u));
+ i->setSrc(1, bld.mkImm(0u));
+
+ return true;
+}
+
// The type that bests represents how each component can be stored when packed.
static DataType
getPackedType(const TexInstruction::ImgFormatDesc *t, int c)
@@ -2188,6 +2225,8 @@ NV50LoweringPreSSA::visit(Instruction *i)
return handleEXPORT(i);
case OP_LOAD:
return handleLOAD(i);
+ case OP_MEMBAR:
+ return handleMEMBAR(i);
case OP_ATOM:
case OP_STORE:
return handleLDST(i);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 6a6ad01ce40..f4340014396 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2569,6 +2569,13 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
addHazard(i, i->src(0).getIndirect(0));
if (i->src(0).isIndirect(1) && typeSizeof(i->dType) >= 8)
addHazard(i, i->src(0).getIndirect(1));
+ if (i->op == OP_LOAD && i->fixed && targ->getChipset() < 0xc0) {
+ // Add a hazard to make sure we keep the op around. These are used
+ // for membars.
+ Instruction *nop = new_Instruction(func, OP_NOP, i->dType);
+ nop->setSrc(0, i->getDef(0));
+ i->bb->insertAfter(i, nop);
+ }
} else
if (i->op == OP_UNION ||
i->op == OP_MERGE ||
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index ccb14535b55..e5a3a995505 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -297,11 +297,6 @@ CodeEmitter::prepareEmission(BasicBlock *bb)
for (i = bb->getEntry(); i; i = next) {
next = i->next;
- if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
- bb->remove(i);
- continue;
- }
-
i->encSize = getMinEncodingSize(i);
if (next && i->encSize < 8)
++nShort;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index af8a290db71..ed89d54afe4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -123,7 +123,11 @@
/* Compute buffer info: 16 surfaces, 12 32-bit integers each */
#define NV50_CB_AUX_BUF_INFO(i) (0x3c4 + (i) * 12 * 4)
#define NV50_CB_AUX_BUF_SIZE (NV50_MAX_GLOBALS * 12 * 4)
-/* next spot: 0x644 */
+/* Compute membar mapped area */
+#define NV50_CB_AUX_MEMBAR_OFFSET 0x6c4
+/* next spot: 0x6c8 */
+/* 0x800 from the end for compute shader membars, reads only. */
+#define NV50_CB_AUX_MEMBAR (NV50_CB_AUX_SIZE - 0x800)
/* 4 32-bit floats for the vertex runout, put at the end */
#define NV50_CB_AUX_RUNOUT_OFFSET (NV50_CB_AUX_SIZE - 0x10)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index d818d21ef19..90349a92b01 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -364,6 +364,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
+ info->io.membarOffset = NV50_CB_AUX_MEMBAR_OFFSET;
+ info->io.gmemMembar = 15;
+
info->assignSlots = nv50_program_assign_varying_slots;
prog->vp.bfc[0] = 0xff;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 2dffde2b2ad..7948a6d37cc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -797,6 +797,12 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
PUSH_DATAh(push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
+ /* set the membar offset */
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, (NV50_CB_AUX_MEMBAR_OFFSET << (8 - 2)) | NV50_CB_AUX);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 1);
+ PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_MEMBAR_OFFSET);
+
nv50_upload_ms_info(push);
/* max TIC (bits 4:8) & TSC bindings, per program type */