diff options
Diffstat (limited to 'src/compiler/nir/nir_opt_uniform_atomics.c')
-rw-r--r-- | src/compiler/nir/nir_opt_uniform_atomics.c | 245 |
1 files changed, 143 insertions, 102 deletions
diff --git a/src/compiler/nir/nir_opt_uniform_atomics.c b/src/compiler/nir/nir_opt_uniform_atomics.c index 2c64e3198a4..71b3fffc029 100644 --- a/src/compiler/nir/nir_opt_uniform_atomics.c +++ b/src/compiler/nir/nir_opt_uniform_atomics.c @@ -40,69 +40,103 @@ #include "nir/nir_builder.h" static nir_op -parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src) +atomic_op_to_alu(nir_atomic_op op) { switch (op) { - #define OP_NOIMG(intrin, alu) \ - case nir_intrinsic_ssbo_atomic_##intrin: \ - *offset_src = 1; \ - *data_src = 2; \ - return nir_op_##alu; \ - case nir_intrinsic_shared_atomic_##intrin: \ - case nir_intrinsic_global_atomic_##intrin: \ - case nir_intrinsic_deref_atomic_##intrin: \ - *offset_src = 0; \ - *data_src = 1; \ - return nir_op_##alu; - #define OP(intrin, alu) \ - OP_NOIMG(intrin, alu) \ - case nir_intrinsic_image_deref_atomic_##intrin: \ - case nir_intrinsic_image_atomic_##intrin: \ - case nir_intrinsic_bindless_image_atomic_##intrin: \ - *offset_src = 1; \ - *data_src = 3; \ - return nir_op_##alu; - OP(add, iadd) - OP(imin, imin) - OP(umin, umin) - OP(imax, imax) - OP(umax, umax) - OP(and, iand) - OP(or, ior) - OP(xor, ixor) - OP(fadd, fadd) - OP_NOIMG(fmin, fmin) - OP_NOIMG(fmax, fmax) - #undef OP_NOIMG - #undef OP + case nir_atomic_op_iadd: + return nir_op_iadd; + case nir_atomic_op_imin: + return nir_op_imin; + case nir_atomic_op_umin: + return nir_op_umin; + case nir_atomic_op_imax: + return nir_op_imax; + case nir_atomic_op_umax: + return nir_op_umax; + case nir_atomic_op_iand: + return nir_op_iand; + case nir_atomic_op_ior: + return nir_op_ior; + case nir_atomic_op_ixor: + return nir_op_ixor; + case nir_atomic_op_fadd: + return nir_op_fadd; + case nir_atomic_op_fmin: + return nir_op_fmin; + case nir_atomic_op_fmax: + return nir_op_fmax; + + /* We don't handle exchanges or wraps */ + case nir_atomic_op_xchg: + case nir_atomic_op_cmpxchg: + case nir_atomic_op_fcmpxchg: + case nir_atomic_op_inc_wrap: + case nir_atomic_op_dec_wrap: + return nir_num_opcodes; + } + + unreachable("Unknown atomic op"); +} + +static nir_op +parse_atomic_op(nir_intrinsic_instr *intr, unsigned *offset_src, + unsigned *data_src, unsigned *offset2_src) +{ + switch (intr->intrinsic) { + case nir_intrinsic_ssbo_atomic: + *offset_src = 1; + *data_src = 2; + *offset2_src = *offset_src; + return atomic_op_to_alu(nir_intrinsic_atomic_op(intr)); + case nir_intrinsic_shared_atomic: + case nir_intrinsic_global_atomic: + case nir_intrinsic_deref_atomic: + *offset_src = 0; + *data_src = 1; + *offset2_src = *offset_src; + return atomic_op_to_alu(nir_intrinsic_atomic_op(intr)); + case nir_intrinsic_global_atomic_amd: + *offset_src = 0; + *data_src = 1; + *offset2_src = 2; + return atomic_op_to_alu(nir_intrinsic_atomic_op(intr)); + case nir_intrinsic_image_deref_atomic: + case nir_intrinsic_image_atomic: + case nir_intrinsic_bindless_image_atomic: + *offset_src = 1; + *data_src = 3; + *offset2_src = *offset_src; + return atomic_op_to_alu(nir_intrinsic_atomic_op(intr)); + default: return nir_num_opcodes; } } static unsigned -get_dim(nir_ssa_scalar scalar) +get_dim(nir_scalar scalar) { if (!scalar.def->divergent) return 0; - if (scalar.def->parent_instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr); - if (intrin->intrinsic == nir_intrinsic_load_subgroup_invocation) + if (nir_scalar_is_intrinsic(scalar)) { + switch (nir_scalar_intrinsic_op(scalar)) { + case nir_intrinsic_load_subgroup_invocation: return 0x8; - else if (intrin->intrinsic == nir_intrinsic_load_local_invocation_index) - return 0x7; - else if (intrin->intrinsic == nir_intrinsic_load_local_invocation_id) - return 1 << scalar.comp; - else if (intrin->intrinsic == nir_intrinsic_load_global_invocation_index) + case nir_intrinsic_load_global_invocation_index: + case nir_intrinsic_load_local_invocation_index: return 0x7; - else if (intrin->intrinsic == nir_intrinsic_load_global_invocation_id) + case nir_intrinsic_load_global_invocation_id: + case nir_intrinsic_load_local_invocation_id: return 1 << scalar.comp; - } else if (nir_ssa_scalar_is_alu(scalar)) { - if (nir_ssa_scalar_alu_op(scalar) == nir_op_iadd || - nir_ssa_scalar_alu_op(scalar) == nir_op_imul) { - nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0); - nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1); + default: + break; + } + } else if (nir_scalar_is_alu(scalar)) { + if (nir_scalar_alu_op(scalar) == nir_op_iadd || + nir_scalar_alu_op(scalar) == nir_op_imul) { + nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0); + nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1); unsigned src0_dim = get_dim(src0); if (!src0_dim && src0.def->divergent) @@ -112,9 +146,9 @@ get_dim(nir_ssa_scalar scalar) return 0; return src0_dim | src1_dim; - } else if (nir_ssa_scalar_alu_op(scalar) == nir_op_ishl) { - nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0); - nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1); + } else if (nir_scalar_alu_op(scalar) == nir_op_ishl) { + nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0); + nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1); return src1.def->divergent ? 0 : get_dim(src0); } } @@ -126,17 +160,17 @@ get_dim(nir_ssa_scalar scalar) * uniform value. */ static unsigned -match_invocation_comparison(nir_ssa_scalar scalar) +match_invocation_comparison(nir_scalar scalar) { - bool is_alu = nir_ssa_scalar_is_alu(scalar); - if (is_alu && nir_ssa_scalar_alu_op(scalar) == nir_op_iand) { - return match_invocation_comparison(nir_ssa_scalar_chase_alu_src(scalar, 0)) | - match_invocation_comparison(nir_ssa_scalar_chase_alu_src(scalar, 1)); - } else if (is_alu && nir_ssa_scalar_alu_op(scalar) == nir_op_ieq) { - if (!nir_ssa_scalar_chase_alu_src(scalar, 0).def->divergent) - return get_dim(nir_ssa_scalar_chase_alu_src(scalar, 1)); - if (!nir_ssa_scalar_chase_alu_src(scalar, 1).def->divergent) - return get_dim(nir_ssa_scalar_chase_alu_src(scalar, 0)); + bool is_alu = nir_scalar_is_alu(scalar); + if (is_alu && nir_scalar_alu_op(scalar) == nir_op_iand) { + return match_invocation_comparison(nir_scalar_chase_alu_src(scalar, 0)) | + match_invocation_comparison(nir_scalar_chase_alu_src(scalar, 1)); + } else if (is_alu && nir_scalar_alu_op(scalar) == nir_op_ieq) { + if (!nir_scalar_chase_alu_src(scalar, 0).def->divergent) + return get_dim(nir_scalar_chase_alu_src(scalar, 1)); + if (!nir_scalar_chase_alu_src(scalar, 1).def->divergent) + return get_dim(nir_scalar_chase_alu_src(scalar, 0)); } else if (scalar.def->parent_instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr); if (intrin->intrinsic == nir_intrinsic_elect) @@ -162,51 +196,59 @@ is_atomic_already_optimized(nir_shader *shader, nir_intrinsic_instr *instr) if (!within_then) continue; - nir_ssa_scalar cond = {nir_cf_node_as_if(cf)->condition.ssa, 0}; + nir_scalar cond = { nir_cf_node_as_if(cf)->condition.ssa, 0 }; dims |= match_invocation_comparison(cond); } } - unsigned dims_needed = 0; - for (unsigned i = 0; i < 3; i++) - dims_needed |= (shader->info.workgroup_size[i] > 1) << i; + if (gl_shader_stage_uses_workgroup(shader->info.stage)) { + unsigned dims_needed = 0; + for (unsigned i = 0; i < 3; i++) + dims_needed |= (shader->info.workgroup_size_variable || + shader->info.workgroup_size[i] > 1) + << i; + if ((dims & dims_needed) == dims_needed) + return true; + } - return (dims & dims_needed) == dims_needed || dims & 0x8; + return dims & 0x8; } /* Perform a reduction and/or exclusive scan. */ static void -reduce_data(nir_builder *b, nir_op op, nir_ssa_def *data, - nir_ssa_def **reduce, nir_ssa_def **scan) +reduce_data(nir_builder *b, nir_op op, nir_def *data, + nir_def **reduce, nir_def **scan) { if (scan) { - *scan = nir_exclusive_scan(b, data, .reduction_op=op); + *scan = nir_exclusive_scan(b, data, .reduction_op = op); if (reduce) { - nir_ssa_def *last_lane = nir_last_invocation(b); - nir_ssa_def *res = nir_build_alu(b, op, *scan, data, NULL, NULL); + nir_def *last_lane = nir_last_invocation(b); + nir_def *res = nir_build_alu(b, op, *scan, data, NULL, NULL); *reduce = nir_read_invocation(b, res, last_lane); } } else { - *reduce = nir_reduce(b, data, .reduction_op=op); + *reduce = nir_reduce(b, data, .reduction_op = op); } } -static nir_ssa_def * +static nir_def * optimize_atomic(nir_builder *b, nir_intrinsic_instr *intrin, bool return_prev) { - unsigned offset_src, data_src; - nir_op op = parse_atomic_op(intrin->intrinsic, &offset_src, &data_src); - nir_ssa_def *data = intrin->src[data_src].ssa; + unsigned offset_src = 0; + unsigned data_src = 0; + unsigned offset2_src = 0; + nir_op op = parse_atomic_op(intrin, &offset_src, &data_src, &offset2_src); + nir_def *data = intrin->src[data_src].ssa; /* Separate uniform reduction and scan is faster than doing a combined scan+reduce */ bool combined_scan_reduce = return_prev && data->divergent; - nir_ssa_def *reduce = NULL, *scan = NULL; + nir_def *reduce = NULL, *scan = NULL; reduce_data(b, op, data, &reduce, combined_scan_reduce ? &scan : NULL); - nir_instr_rewrite_src(&intrin->instr, &intrin->src[data_src], nir_src_for_ssa(reduce)); + nir_src_rewrite(&intrin->src[data_src], reduce); nir_update_instr_divergence(b->shader, &intrin->instr); - nir_ssa_def *cond = nir_elect(b, 1); + nir_def *cond = nir_elect(b, 1); nir_if *nif = nir_push_if(b, cond); @@ -216,10 +258,10 @@ optimize_atomic(nir_builder *b, nir_intrinsic_instr *intrin, bool return_prev) if (return_prev) { nir_push_else(b, nif); - nir_ssa_def *undef = nir_ssa_undef(b, 1, intrin->dest.ssa.bit_size); + nir_def *undef = nir_undef(b, 1, intrin->def.bit_size); nir_pop_if(b, nif); - nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, undef); + nir_def *result = nir_if_phi(b, &intrin->def, undef); result = nir_read_first_invocation(b, result); if (!combined_scan_reduce) @@ -237,23 +279,23 @@ optimize_and_rewrite_atomic(nir_builder *b, nir_intrinsic_instr *intrin) { nir_if *helper_nif = NULL; if (b->shader->info.stage == MESA_SHADER_FRAGMENT) { - nir_ssa_def *helper = nir_is_helper_invocation(b, 1); + nir_def *helper = nir_is_helper_invocation(b, 1); helper_nif = nir_push_if(b, nir_inot(b, helper)); } - ASSERTED bool original_result_divergent = intrin->dest.ssa.divergent; - bool return_prev = !nir_ssa_def_is_unused(&intrin->dest.ssa); + ASSERTED bool original_result_divergent = intrin->def.divergent; + bool return_prev = !nir_def_is_unused(&intrin->def); - nir_ssa_def old_result = intrin->dest.ssa; - list_replace(&intrin->dest.ssa.uses, &old_result.uses); - list_replace(&intrin->dest.ssa.if_uses, &old_result.if_uses); - nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, intrin->dest.ssa.bit_size, NULL); + nir_def old_result = intrin->def; + list_replace(&intrin->def.uses, &old_result.uses); + nir_def_init(&intrin->instr, &intrin->def, 1, + intrin->def.bit_size); - nir_ssa_def *result = optimize_atomic(b, intrin, return_prev); + nir_def *result = optimize_atomic(b, intrin, return_prev); if (helper_nif) { nir_push_else(b, helper_nif); - nir_ssa_def *undef = result ? nir_ssa_undef(b, 1, result->bit_size) : NULL; + nir_def *undef = result ? nir_undef(b, 1, result->bit_size) : NULL; nir_pop_if(b, helper_nif); if (result) result = nir_if_phi(b, result, undef); @@ -261,7 +303,7 @@ optimize_and_rewrite_atomic(nir_builder *b, nir_intrinsic_instr *intrin) if (result) { assert(result->divergent == original_result_divergent); - nir_ssa_def_rewrite_uses(&old_result, result); + nir_def_rewrite_uses(&old_result, result); } } @@ -269,8 +311,7 @@ static bool opt_uniform_atomics(nir_function_impl *impl) { bool progress = false; - nir_builder b; - nir_builder_init(&b, impl); + nir_builder b = nir_builder_create(impl); b.update_divergence = true; nir_foreach_block(block, impl) { @@ -279,12 +320,15 @@ opt_uniform_atomics(nir_function_impl *impl) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - unsigned offset_src, data_src; - if (parse_atomic_op(intrin->intrinsic, &offset_src, &data_src) == nir_num_opcodes) + unsigned offset_src, data_src, offset2_src; + if (parse_atomic_op(intrin, &offset_src, &data_src, &offset2_src) == + nir_num_opcodes) continue; if (nir_src_is_divergent(intrin->src[offset_src])) continue; + if (nir_src_is_divergent(intrin->src[offset2_src])) + continue; if (is_atomic_already_optimized(b.shader, intrin)) continue; @@ -312,15 +356,12 @@ nir_opt_uniform_atomics(nir_shader *shader) shader->info.workgroup_size[2] == 1) return false; - nir_foreach_function(function, shader) { - if (!function->impl) - continue; - - if (opt_uniform_atomics(function->impl)) { + nir_foreach_function_impl(impl, shader) { + if (opt_uniform_atomics(impl)) { progress = true; - nir_metadata_preserve(function->impl, 0); + nir_metadata_preserve(impl, nir_metadata_none); } else { - nir_metadata_preserve(function->impl, nir_metadata_all); + nir_metadata_preserve(impl, nir_metadata_all); } } |