summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2018-12-09 22:03:42 -0800
committerEric Anholt <eric@anholt.net>2019-01-14 15:40:55 -0800
commitf72820c851858819b6abf28bd75494f3c4bb7057 (patch)
tree0d74e3d53efa60d07cb0b0ce695472029d35fdac
parent9b45b06d7c685c4ef23a25c086870cee05ead1b5 (diff)
v3d: Add support for CS barrier() intrinsics.
-rw-r--r--src/broadcom/compiler/nir_to_vir.c50
-rw-r--r--src/broadcom/compiler/qpu_schedule.c10
-rw-r--r--src/broadcom/compiler/v3d_compiler.h1
3 files changed, 61 insertions, 0 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 3c24d5dfd9c..a5e75f650e8 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -66,6 +66,23 @@
#define GENERAL_TMU_WRITE_OP_ATOMIC_XOR (10 << 3)
#define GENERAL_TMU_WRITE_OP_WRITE (15 << 3)
+#define V3D_TSY_SET_QUORUM 0
+#define V3D_TSY_INC_WAITERS 1
+#define V3D_TSY_DEC_WAITERS 2
+#define V3D_TSY_INC_QUORUM 3
+#define V3D_TSY_DEC_QUORUM 4
+#define V3D_TSY_FREE_ALL 5
+#define V3D_TSY_RELEASE 6
+#define V3D_TSY_ACQUIRE 7
+#define V3D_TSY_WAIT 8
+#define V3D_TSY_WAIT_INC 9
+#define V3D_TSY_WAIT_CHECK 10
+#define V3D_TSY_WAIT_INC_CHECK 11
+#define V3D_TSY_WAIT_CV 12
+#define V3D_TSY_INC_SEMAPHORE 13
+#define V3D_TSY_DEC_SEMAPHORE 14
+#define V3D_TSY_SET_QUORUM_FREE_ALL 15
+
static void
ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
@@ -1937,6 +1954,33 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
*/
break;
+ case nir_intrinsic_barrier:
+ /* Emit a TSY op to get all invocations in the workgroup
+ * (actually supergroup) to block until the last invocation
+ * reaches the TSY op.
+ */
+ if (c->devinfo->ver >= 42) {
+ vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
+ V3D_QPU_WADDR_SYNCB));
+ } else {
+ struct qinst *sync =
+ vir_BARRIERID_dest(c,
+ vir_reg(QFILE_MAGIC,
+ V3D_QPU_WADDR_SYNCU));
+ sync->src[vir_get_implicit_uniform_src(sync)] =
+ vir_uniform_ui(c,
+ 0xffffff00 |
+ V3D_TSY_WAIT_INC_CHECK);
+
+ }
+
+ /* The blocking of a TSY op only happens at the next thread
+ * switch. No texturing may be outstanding at the time of a
+ * TSY blocking operation.
+ */
+ vir_emit_thrsw(c);
+ break;
+
case nir_intrinsic_load_num_work_groups:
for (int i = 0; i < 3; i++) {
ntq_store_dest(c, &instr->dest, i,
@@ -2337,6 +2381,12 @@ nir_to_vir(struct v3d_compile *c)
}
break;
case MESA_SHADER_COMPUTE:
+ /* Set up the TSO for barriers, assuming we do some. */
+ if (c->devinfo->ver < 42) {
+ vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
+ V3D_QPU_WADDR_SYNC));
+ }
+
if (c->s->info.system_values_read &
((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
(1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index be794a88c14..0f8001ff52d 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -236,6 +236,16 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
add_write_dep(state, &state->last_tlb, n);
break;
+ case V3D_QPU_WADDR_SYNC:
+ case V3D_QPU_WADDR_SYNCB:
+ case V3D_QPU_WADDR_SYNCU:
+ /* For CS barrier(): Sync against any other memory
+ * accesses. There doesn't appear to be any need for
+ * barriers to affect ALU operations.
+ */
+ add_write_dep(state, &state->last_tmu_write, n);
+ break;
+
case V3D_QPU_WADDR_NOP:
break;
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 8cf6c5605f2..de56d7e4f10 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -1003,6 +1003,7 @@ VIR_A_ALU0(FYCD)
VIR_A_ALU0(YCD)
VIR_A_ALU0(MSF)
VIR_A_ALU0(REVF)
+VIR_A_ALU0(BARRIERID)
VIR_A_NODST_1(VPMSETUP)
VIR_A_NODST_0(VPMWT)
VIR_A_ALU2(FCMP)