diff options
author | Jason Ekstrand <jason.ekstrand@intel.com> | 2017-09-29 17:57:32 -0700 |
---|---|---|
committer | Jason Ekstrand <jason.ekstrand@intel.com> | 2017-11-07 10:37:52 -0800 |
commit | 80ddfab2f54d7cd9dd4b93d2fbfa239f061a1f2b (patch) | |
tree | ae7d48e61d34f58d19bc7958b1cb31f3d87a8bed /src/intel/compiler/brw_nir_lower_cs_intrinsics.c | |
parent | 25f7453c9e6dc7c947b936bdac86680c332362bf (diff) |
intel/cs: Rework the way thread local ID is handled
Previously, brw_nir_lower_intrinsics added the param and then emitted a
load_uniform intrinsic to load it directly. This commit switches things
over to use a specific NIR intrinsic for the thread id. The one thing I
don't like about this approach is that we have to copy thread_local_id
over to the new visitor in import_uniforms.
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Diffstat (limited to 'src/intel/compiler/brw_nir_lower_cs_intrinsics.c')
-rw-r--r-- | src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 53 |
1 files changed, 11 insertions, 42 deletions
diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c index d27727624c6..07d2dccd041 100644 --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c @@ -26,47 +26,12 @@ struct lower_intrinsics_state { nir_shader *nir; - struct brw_cs_prog_data *prog_data; nir_function_impl *impl; bool progress; nir_builder builder; - int thread_local_id_index; + unsigned local_workgroup_size; }; -static nir_ssa_def * -read_thread_local_id(struct lower_intrinsics_state *state) -{ - struct brw_cs_prog_data *prog_data = state->prog_data; - nir_builder *b = &state->builder; - nir_shader *nir = state->nir; - const unsigned *sizes = nir->info.cs.local_size; - const unsigned group_size = sizes[0] * sizes[1] * sizes[2]; - - /* Some programs have local_size dimensions so small that the thread local - * ID will always be 0. - */ - if (group_size <= 8) - return nir_imm_int(b, 0); - - if (state->thread_local_id_index == -1) { - state->thread_local_id_index = prog_data->base.nr_params; - uint32_t *param = brw_stage_prog_data_add_params(&prog_data->base, 1); - *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID; - nir->num_uniforms += 4; - } - unsigned id_index = state->thread_local_id_index; - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); - load->num_components = 1; - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); - nir_intrinsic_set_base(load, id_index * sizeof(uint32_t)); - nir_intrinsic_set_range(load, sizeof(uint32_t)); - nir_builder_instr_insert(b, &load->instr); - return &load->dest.ssa; -} - static bool lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state, nir_block *block) @@ -91,7 +56,12 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state, * gl_LocalInvocationIndex = * cs_thread_local_id + subgroup_invocation; */ - nir_ssa_def *thread_local_id = read_thread_local_id(state); + nir_ssa_def *thread_local_id; + if (state->local_workgroup_size <= 8) + thread_local_id = nir_imm_int(b, 0); + else + thread_local_id = nir_load_intel_thread_local_id(b); + nir_ssa_def *channel = nir_load_subgroup_invocation(b); sysval = nir_iadd(b, channel, thread_local_id); break; @@ -157,8 +127,7 @@ lower_cs_intrinsics_convert_impl(struct lower_intrinsics_state *state) } bool -brw_nir_lower_cs_intrinsics(nir_shader *nir, - struct brw_cs_prog_data *prog_data) +brw_nir_lower_cs_intrinsics(nir_shader *nir) { assert(nir->info.stage == MESA_SHADER_COMPUTE); @@ -166,9 +135,9 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir, struct lower_intrinsics_state state; memset(&state, 0, sizeof(state)); state.nir = nir; - state.prog_data = prog_data; - - state.thread_local_id_index = -1; + state.local_workgroup_size = nir->info.cs.local_size[0] * + nir->info.cs.local_size[1] * + nir->info.cs.local_size[2]; do { state.progress = false; |