summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2018-12-07 15:47:18 -0800
committerEric Anholt <eric@anholt.net>2019-01-14 15:40:55 -0800
commit01d913cf90d3c57ba82f555bdf4ccb8ef7f9801e (patch)
tree48640d00a6c7a037cd30e23539f77d3c7312bb56 /src
parent6281f26f064ada36b57d45feb68d8e7d783198c9 (diff)
v3d: Add support for CS workgroup/invocation id intrinsics.
We get a payload for the ivec3 workgroup and an int local invocation index, and we use the core lowering to turn into the global invocation id and the local invocation id ivec3s.
Diffstat (limited to 'src')
-rw-r--r--src/broadcom/compiler/nir_to_vir.c54
-rw-r--r--src/broadcom/compiler/v3d_compiler.h8
-rw-r--r--src/broadcom/compiler/vir.c1
-rw-r--r--src/broadcom/compiler/vir_dump.c4
-rw-r--r--src/broadcom/compiler/vir_register_allocate.c1
5 files changed, 67 insertions, 1 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index f10ed5975c1..c1889a7d645 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1899,6 +1899,32 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
*/
break;
+ case nir_intrinsic_load_num_work_groups:
+ for (int i = 0; i < 3; i++) {
+ ntq_store_dest(c, &instr->dest, i,
+ vir_uniform(c, QUNIFORM_NUM_WORK_GROUPS,
+ i));
+ }
+ break;
+
+ case nir_intrinsic_load_local_invocation_index:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_SHR(c, c->cs_payload[1],
+ vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
+ break;
+
+ case nir_intrinsic_load_work_group_id:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_AND(c, c->cs_payload[0],
+ vir_uniform_ui(c, 0xffff)));
+ ntq_store_dest(c, &instr->dest, 1,
+ vir_SHR(c, c->cs_payload[0],
+ vir_uniform_ui(c, 16)));
+ ntq_store_dest(c, &instr->dest, 2,
+ vir_AND(c, c->cs_payload[1],
+ vir_uniform_ui(c, 0xffff)));
+ break;
+
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
@@ -2255,7 +2281,8 @@ ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl)
static void
nir_to_vir(struct v3d_compile *c)
{
- if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ switch (c->s->info.stage) {
+ case MESA_SHADER_FRAGMENT:
c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
@@ -2270,6 +2297,30 @@ nir_to_vir(struct v3d_compile *c)
} else if (c->fs_key->is_lines) {
c->line_x = emit_fragment_varying(c, NULL, 0, 0);
}
+ break;
+ case MESA_SHADER_COMPUTE:
+ if (c->s->info.system_values_read &
+ ((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
+ (1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
+ c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
+ }
+ if (c->s->info.system_values_read &
+ ((1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
+ c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
+ }
+
+ /* Set up the division between gl_LocalInvocationIndex and
+ * wg_in_mem in the payload reg.
+ */
+ int wg_size = (c->s->info.cs.local_size[0] *
+ c->s->info.cs.local_size[1] *
+ c->s->info.cs.local_size[2]);
+ c->local_invocation_index_bits =
+ ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
+ assert(c->local_invocation_index_bits <= 8);
+ break;
+ default:
+ break;
}
if (c->s->info.stage == MESA_SHADER_FRAGMENT)
@@ -2298,6 +2349,7 @@ const nir_shader_compiler_options v3d_nir_options = {
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,
+ .lower_cs_local_id_from_index = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index a5f99d3dae8..c61e0c95156 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -259,6 +259,11 @@ enum quniform_contents {
QUNIFORM_ALPHA_REF,
+ /* Number of workgroups passed to glDispatchCompute in the dimension
+ * selected by the data value.
+ */
+ QUNIFORM_NUM_WORK_GROUPS,
+
/**
* Returns the the offset of the scratch buffer for register spilling.
*/
@@ -540,6 +545,9 @@ struct v3d_compile {
/* Fragment shader payload regs. */
struct qreg payload_w, payload_w_centroid, payload_z;
+ struct qreg cs_payload[2];
+ int local_invocation_index_bits;
+
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
uint32_t num_vpm_writes;
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 55a02123322..10105fbd861 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -639,6 +639,7 @@ v3d_lower_nir(struct v3d_compile *c)
}
NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
+ NIR_PASS_V(c->s, nir_lower_system_values);
}
static void
diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c
index 028e2b36c4b..0ec3070dedc 100644
--- a/src/broadcom/compiler/vir_dump.c
+++ b/src/broadcom/compiler/vir_dump.c
@@ -108,6 +108,10 @@ vir_dump_uniform(enum quniform_contents contents,
fprintf(stderr, "ssbo_size[%d]", data);
break;
+ case QUNIFORM_NUM_WORK_GROUPS:
+ fprintf(stderr, "num_wg.%c", data < 3 ? "xyz"[data] : '?');
+ break;
+
default:
if (quniform_contents_is_texture_p0(contents)) {
fprintf(stderr, "tex[%d].p0: 0x%08x",
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index accc07a3a33..79ab5acd764 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -482,6 +482,7 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
case 0:
case 1:
case 2:
+ case 3:
/* Payload setup instructions: Force allocate
* the dst to the given register (so the MOV
* will disappear).