summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBen Skeggs <bskeggs@redhat.com>2020-06-07 09:51:51 +1000
committerMarge Bot <eric+marge@anholt.net>2020-06-10 22:52:41 +0000
commit60b28f7a5031324469a751cfbf9567204c4fc313 (patch)
tree5961832c7ee62b496fef9f0237871c02bcfa25cb /src
parentddedfcdf2116396f7630a4604667f946be64c588 (diff)
nvir: introduce OP_BREV with lowering to EXTBF_REV for current GPUs
SM70 has this instruction, but no BFE. Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp9
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp11
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp6
8 files changed, 29 insertions, 12 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 5b8595c961e..e4bbc8edafb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -152,6 +152,7 @@ enum operation
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
OP_BFIND, // find highest/lowest set bit
+ OP_BREV, // bitfield reverse
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 50c8345e4b6..42731f32f92 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1910,7 +1910,7 @@ Converter::visit(nir_intrinsic_instr *insn)
if (op == nir_intrinsic_read_first_invocation) {
mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
- mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
} else
tmp = getSrc(&insn->src[1], 0);
@@ -2794,14 +2794,14 @@ Converter::visit(nir_alu_instr *insn)
case nir_op_bitfield_reverse: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
- mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
break;
}
case nir_op_find_lsb: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
Value *tmp = getSSA();
- mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 60f3d582a0b..3fd76f64de0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -3401,8 +3401,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
// ReadInvocationARB(src, findLSB(ballot(true)))
val0 = getScratch();
mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
- mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
- ->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, val0, val0);
mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
src1 = val0;
/* fallthrough */
@@ -3820,8 +3819,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
val0 = getScratch();
- geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
- geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, val0, src0);
geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
}
@@ -3836,8 +3834,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_BREV:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
- geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
- geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, dst0[c], src0);
}
break;
case TGSI_OPCODE_POPC:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index a60881000fe..ccdc2f98ef6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp)
cmp->sType = hTy;
}
+void
+NVC0LegalizeSSA::handleBREV(Instruction *i)
+{
+ i->op = OP_EXTBF;
+ i->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ i->setSrc(1, bld.mkImm(0x2000));
+}
+
bool
NVC0LegalizeSSA::visit(Function *fn)
{
@@ -354,6 +362,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64)
handleSET(i->asCmp());
break;
+ case OP_BREV:
+ handleBREV(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index b4c405a9ea5..a4925013ee4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -68,6 +68,7 @@ private:
void handleSET(CmpInstruction *);
void handleTEXLOD(TexInstruction *);
void handleShift(Instruction *);
+ void handleBREV(Instruction *);
protected:
BuildUtil bld;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 5c3d15968cf..d17e605a51e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1534,6 +1534,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->subOp = 0;
break;
}
+ case OP_BREV: {
+ uint32_t res = util_bitreverse(imm0.reg.data.u32);
+ i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
+ i->op = OP_MOV;
+ break;
+ }
case OP_POPCNT: {
// Only deal with 1-arg POPCNT here
if (i->srcExists(1))
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index b85ace15b87..e3bac648761 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -178,6 +178,7 @@ const char *operationStr[OP_LAST + 1] =
"insbf",
"extbf",
"bfind",
+ "brev",
"permt",
"atom",
"bar",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index e801722ec09..77edf41e124 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -51,7 +51,7 @@ const uint8_t Target::operationSrcNr[] =
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
- 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
+ 2, 3, 2, 1, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, PERMT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
@@ -120,9 +120,9 @@ const OpClass Target::operationClass[] =
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
- // POPCNT, INSBF, EXTBF, BFIND; PERMT
+ // POPCNT, INSBF, EXTBF, BFIND, BREV; PERMT
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
- OPCLASS_BITFIELD,
+ OPCLASS_BITFIELD, OPCLASS_BITFIELD,
// ATOM, BAR
OPCLASS_ATOMIC, OPCLASS_CONTROL,
// VADD, VAVG, VMIN, VMAX