summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimur Kristóf <timur.kristof@gmail.com>2021-04-09 16:56:57 +0200
committerMarge Bot <eric+marge@anholt.net>2021-05-12 13:47:04 +0000
commit3d589b8b464828dc19c4e7d73f5cc078c24915e1 (patch)
treea6bebccef314996eebafeb9d81c8d845776e66a3
parent89a76ff78679aee99e6caa34a71444b3b9bb5b57 (diff)
ac: Add new NIR pass to lower NGG VS/TES.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10740>
-rw-r--r--src/amd/common/ac_nir.h17
-rw-r--r--src/amd/common/ac_nir_lower_ngg.c272
-rw-r--r--src/amd/common/meson.build1
3 files changed, 290 insertions, 0 deletions
diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h
index b3d4c6b56f7..59e0d307f8a 100644
--- a/src/amd/common/ac_nir.h
+++ b/src/amd/common/ac_nir.h
@@ -86,6 +86,23 @@ bool
ac_nir_lower_indirect_derefs(nir_shader *shader,
enum chip_class chip_class);
+typedef struct
+{
+ bool can_cull;
+ bool passthrough;
+} ac_nir_ngg_config;
+
+ac_nir_ngg_config
+ac_nir_lower_ngg_nogs(nir_shader *shader,
+ unsigned max_num_es_vertices,
+ unsigned num_vertices_per_primitive,
+ unsigned max_workgroup_size,
+ unsigned wave_size,
+ bool consider_culling,
+ bool consider_passthrough,
+ bool export_prim_id,
+ bool provoking_vtx_last);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c
new file mode 100644
index 00000000000..14c60408e4d
--- /dev/null
+++ b/src/amd/common/ac_nir_lower_ngg.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright © 2021 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "ac_nir.h"
+#include "nir_builder.h"
+#include "u_math.h"
+
+typedef struct
+{
+ nir_variable *position_value_var;
+ nir_variable *prim_exp_arg_var;
+
+ bool passthrough;
+ bool export_prim_id;
+ bool early_prim_export;
+ unsigned max_num_waves;
+ unsigned num_vertices_per_primitives;
+ unsigned provoking_vtx_idx;
+ unsigned max_es_num_vertices;
+ unsigned total_lds_bytes;
+} lower_ngg_nogs_state;
+
+static nir_ssa_def *
+pervertex_lds_addr(nir_builder *b, nir_ssa_def *vertex_idx, unsigned per_vtx_bytes)
+{
+ return nir_imul_imm(b, vertex_idx, per_vtx_bytes);
+}
+
+static nir_ssa_def *
+emit_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives,
+ nir_ssa_def *vertex_indices[3], nir_ssa_def *is_null_prim)
+{
+ nir_ssa_def *arg = vertex_indices[0];
+
+ for (unsigned i = 0; i < num_vertices_per_primitives; ++i) {
+ assert(vertex_indices[i]);
+
+ if (i)
+ arg = nir_ior(b, arg, nir_ishl(b, vertex_indices[i], nir_imm_int(b, 10u * i)));
+
+ if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+ nir_ssa_def *edgeflag = nir_build_load_initial_edgeflag_amd(b, 32, nir_imm_int(b, i));
+ arg = nir_ior(b, arg, nir_ishl(b, edgeflag, nir_imm_int(b, 10u * i + 9u)));
+ }
+ }
+
+ if (is_null_prim) {
+ if (is_null_prim->bit_size == 1)
+ is_null_prim = nir_b2i32(b, is_null_prim);
+ assert(is_null_prim->bit_size == 32);
+ arg = nir_ior(b, arg, nir_ishl(b, is_null_prim, nir_imm_int(b, 31u)));
+ }
+
+ return arg;
+}
+
+static nir_ssa_def *
+ngg_input_primitive_vertex_index(nir_builder *b, unsigned vertex)
+{
+ /* TODO: This is RADV specific. We'll need to refactor RADV and/or RadeonSI to match. */
+ return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u * 2u),
+ nir_imm_int(b, (vertex % 2u) * 16u), nir_imm_int(b, 16u));
+}
+
+static nir_ssa_def *
+emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *st)
+{
+ if (st->passthrough) {
+ assert(!st->export_prim_id || b->shader->info.stage != MESA_SHADER_VERTEX);
+ return nir_build_load_packed_passthrough_primitive_amd(b);
+ } else {
+ nir_ssa_def *vtx_idx[3] = {0};
+
+ vtx_idx[0] = ngg_input_primitive_vertex_index(b, 0);
+ vtx_idx[1] = st->num_vertices_per_primitives >= 2
+ ? ngg_input_primitive_vertex_index(b, 1)
+ : nir_imm_zero(b, 1, 32);
+ vtx_idx[2] = st->num_vertices_per_primitives >= 3
+ ? ngg_input_primitive_vertex_index(b, 2)
+ : nir_imm_zero(b, 1, 32);
+
+ return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL);
+ }
+}
+
+static void
+emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def *arg)
+{
+ nir_if *if_gs_thread = nir_push_if(b, nir_build_has_input_primitive_amd(b));
+ {
+ if (!arg)
+ arg = emit_ngg_nogs_prim_exp_arg(b, st);
+
+ if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) {
+ /* Copy Primitive IDs from GS threads to the LDS address corresponding to the ES thread of the provoking vertex. */
+ nir_ssa_def *prim_id = nir_build_load_primitive_id(b);
+ nir_ssa_def *provoking_vtx_idx = ngg_input_primitive_vertex_index(b, st->provoking_vtx_idx);
+ nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u);
+
+ nir_build_store_shared(b, prim_id, addr, .write_mask = 1u, .align_mul = 4u);
+ }
+
+ nir_build_export_primitive_amd(b, arg);
+ }
+ nir_pop_if(b, if_gs_thread);
+}
+
+static void
+emit_store_ngg_nogs_es_primitive_id(nir_builder *b)
+{
+ nir_ssa_def *prim_id = NULL;
+
+ if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+ /* Workgroup barrier - wait for GS threads to store primitive ID in LDS. */
+ nir_scoped_barrier(b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_WORKGROUP,
+ .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
+
+ /* LDS address where the primitive ID is stored */
+ nir_ssa_def *thread_id_in_threadgroup = nir_build_load_local_invocation_index(b);
+ nir_ssa_def *addr = pervertex_lds_addr(b, thread_id_in_threadgroup, 4u);
+
+ /* Load primitive ID from LDS */
+ prim_id = nir_build_load_shared(b, 1, 32, addr, .align_mul = 4u);
+ } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
+ /* Just use tess eval primitive ID, which is the same as the patch ID. */
+ prim_id = nir_build_load_primitive_id(b);
+ }
+
+ nir_io_semantics io_sem = {
+ .location = VARYING_SLOT_PRIMITIVE_ID,
+ .num_slots = 1,
+ };
+
+ nir_build_store_output(b, prim_id, nir_imm_zero(b, 1, 32),
+ .base = io_sem.location,
+ .write_mask = 1u, .src_type = nir_type_uint32, .io_semantics = io_sem);
+}
+
+ac_nir_ngg_config
+ac_nir_lower_ngg_nogs(nir_shader *shader,
+ unsigned max_num_es_vertices,
+ unsigned num_vertices_per_primitives,
+ unsigned max_workgroup_size,
+ unsigned wave_size,
+ bool consider_culling,
+ bool consider_passthrough,
+ bool export_prim_id,
+ bool provoking_vtx_last)
+{
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ assert(impl);
+ assert(max_num_es_vertices && max_workgroup_size && wave_size);
+
+ bool can_cull = false; /* TODO */
+ bool passthrough = consider_passthrough && !can_cull &&
+ !(shader->info.stage == MESA_SHADER_VERTEX && export_prim_id);
+
+ nir_variable *position_value_var = nir_local_variable_create(impl, glsl_vec4_type(), "position_value");
+ nir_variable *prim_exp_arg_var = nir_local_variable_create(impl, glsl_uint_type(), "prim_exp_arg");
+
+ lower_ngg_nogs_state state = {
+ .passthrough = passthrough,
+ .export_prim_id = export_prim_id,
+ .early_prim_export = exec_list_is_singular(&impl->body),
+ .num_vertices_per_primitives = num_vertices_per_primitives,
+ .provoking_vtx_idx = provoking_vtx_last ? (num_vertices_per_primitives - 1) : 0,
+ .position_value_var = position_value_var,
+ .prim_exp_arg_var = prim_exp_arg_var,
+ .max_num_waves = DIV_ROUND_UP(max_workgroup_size, wave_size),
+ .max_es_num_vertices = max_num_es_vertices,
+ };
+
+ /* We need LDS space when VS needs to export the primitive ID. */
+ if (shader->info.stage == MESA_SHADER_VERTEX && export_prim_id)
+ state.total_lds_bytes = max_num_es_vertices * 4u;
+
+ nir_cf_list extracted;
+ nir_cf_extract(&extracted, nir_before_cf_list(&impl->body), nir_after_cf_list(&impl->body));
+
+ nir_builder builder;
+ nir_builder *b = &builder; /* This is to avoid the & */
+ nir_builder_init(b, impl);
+ b->cursor = nir_before_cf_list(&impl->body);
+
+ if (!can_cull) {
+ /* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */
+ nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0)));
+ {
+ nir_ssa_def *vtx_cnt = nir_build_load_workgroup_num_input_vertices_amd(b);
+ nir_ssa_def *prim_cnt = nir_build_load_workgroup_num_input_primitives_amd(b);
+ nir_build_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt);
+ }
+ nir_pop_if(b, if_wave_0);
+
+ /* Take care of early primitive export, otherwise just pack the primitive export argument */
+ if (state.early_prim_export)
+ emit_ngg_nogs_prim_export(b, &state, NULL);
+ else
+ nir_store_var(b, prim_exp_arg_var, emit_ngg_nogs_prim_exp_arg(b, &state), 0x1u);
+ } else {
+ abort(); /* TODO */
+ }
+
+ nir_if *if_es_thread = nir_push_if(b, nir_build_has_input_vertex_amd(b));
+ {
+ if (can_cull) {
+ nir_ssa_def *pos_val = nir_load_var(b, state.position_value_var);
+ nir_io_semantics io_sem = { .location = VARYING_SLOT_POS, .num_slots = 1 };
+ nir_build_store_output(b, pos_val, nir_imm_int(b, 0), .base = VARYING_SLOT_POS, .component = 0, .io_semantics = io_sem, .write_mask = 0xfu);
+ }
+
+ /* Run the actual shader */
+ nir_cf_reinsert(&extracted, b->cursor);
+ b->cursor = nir_after_cf_list(&if_es_thread->then_list);
+
+ /* Export all vertex attributes (except primitive ID) */
+ nir_build_export_vertex_amd(b);
+
+ /* Export primitive ID (in case of early primitive export or TES) */
+ if (state.export_prim_id && (state.early_prim_export || shader->info.stage != MESA_SHADER_VERTEX))
+ emit_store_ngg_nogs_es_primitive_id(b);
+ }
+ nir_pop_if(b, if_es_thread);
+
+ /* Take care of late primitive export */
+ if (!state.early_prim_export) {
+ emit_ngg_nogs_prim_export(b, &state, nir_load_var(b, prim_exp_arg_var));
+ if (state.export_prim_id && shader->info.stage == MESA_SHADER_VERTEX) {
+ if_es_thread = nir_push_if(b, nir_build_has_input_vertex_amd(b));
+ emit_store_ngg_nogs_es_primitive_id(b);
+ nir_pop_if(b, if_es_thread);
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_none);
+ nir_validate_shader(shader, "after emitting NGG VS/TES");
+
+ /* Cleanup */
+ nir_lower_vars_to_ssa(shader);
+ nir_remove_dead_variables(shader, nir_var_function_temp, NULL);
+ nir_opt_undef(shader);
+
+ shader->info.shared_size = state.total_lds_bytes;
+
+ ac_nir_ngg_config ret = {
+ .can_cull = can_cull,
+ .passthrough = passthrough,
+ };
+
+ return ret;
+}
diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build
index fc8d0fe7e49..e4a2b3368a3 100644
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -91,6 +91,7 @@ amd_common_files = files(
'ac_nir.h',
'ac_nir_lower_esgs_io_to_mem.c',
'ac_nir_lower_tess_io_to_mem.c',
+ 'ac_nir_lower_ngg.c',
'amd_family.c',
)