diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c | 1661 |
1 files changed, 1100 insertions, 561 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index dde6831be3b..a740d13d007 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -30,10 +30,13 @@ #include "lp_bld_gather.h" #include "lp_bld_const.h" #include "lp_bld_struct.h" +#include "lp_bld_jit_types.h" #include "lp_bld_arit.h" #include "lp_bld_bitarit.h" #include "lp_bld_coro.h" #include "lp_bld_printf.h" +#include "lp_bld_intr.h" +#include "util/u_cpu_detect.h" #include "util/u_math.h" static int bit_size_to_shift_size(int bit_size) @@ -70,6 +73,77 @@ mask_vec(struct lp_build_nir_context *bld_base) exec_mask->exec_mask, ""); } +static bool +invocation_0_must_be_active(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base; + + /* Fragment shaders may dispatch with invocation 0 inactive. All other + * stages have invocation 0 active at the top. (See + * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c) + */ + if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) + return false; + + /* If we're in some control flow right now, then invocation 0 may be + * disabled. + */ + if (bld->exec_mask.has_mask) + return false; + + return true; +} + +/** + * Returns a scalar value of the first active invocation in the exec_mask. + * + * Note that gallivm doesn't generally jump when exec_mask is 0 (such as if/else + * branches thare are all false, or portions of a loop after a break/continue + * has ended the last invocation that had been active in the loop). In that + * case, we return a 0 value so that unconditional LLVMBuildExtractElement of + * the first_active_invocation (such as in memory loads, texture unit index + * lookups, etc) will use a valid index + */ +static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_base) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + + if (invocation_0_must_be_active(bld_base)) + return lp_build_const_int32(gallivm, 0); + + LLVMValueRef exec_mask = mask_vec(bld_base); + + LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "exec_bitvec"); + /* Turn it from N x i1 to iN, then extend it up to i32 so we can use a single + * cttz intrinsic -- I assume the compiler will drop the extend if there are + * smaller instructions available, since we have is_zero_poison. + */ + bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, uint_bld->type.length), "exec_bitmask"); + bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, ""); + + LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active"); + + LLVMValueRef first_active = lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask, + LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), false, false)); + + return LLVMBuildSelect(builder, any_active, first_active, lp_build_const_int32(gallivm, 0), "first_active_or_0"); +} + +static LLVMValueRef +lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float) +{ + if (bit_size == 64) + return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0); + else if (bit_size == 16) + return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0); + else if (bit_size == 8) + return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0); + else + return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0); +} + static LLVMValueRef emit_fetch_64bit( struct lp_build_nir_context * bld_base, @@ -163,7 +237,7 @@ get_soa_array_offsets(struct lp_build_context *uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length); LLVMValueRef index_vec; - /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ + /* index_vec = (indirect_index * num_components + chan_index) * length + offsets */ index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components)); index_vec = lp_build_add(uint_bld, index_vec, chan_vec); index_vec = lp_build_mul(uint_bld, index_vec, length_vec); @@ -186,6 +260,7 @@ get_soa_array_offsets(struct lp_build_context *uint_bld, static LLVMValueRef build_gather(struct lp_build_nir_context *bld_base, struct lp_build_context *bld, + LLVMTypeRef base_type, LLVMValueRef base_ptr, LLVMValueRef indexes, LLVMValueRef overflow_mask, @@ -246,9 +321,9 @@ build_gather(struct lp_build_nir_context *bld_base, index = LLVMBuildExtractElement(builder, indexes, si, ""); } - scalar_ptr = LLVMBuildGEP(builder, base_ptr, - &index, 1, "gather_ptr"); - scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + + scalar_ptr = LLVMBuildGEP2(builder, base_type, base_ptr, &index, 1, "gather_ptr"); + scalar = LLVMBuildLoad2(builder, base_type, scalar_ptr, ""); res = LLVMBuildInsertElement(builder, res, scalar, di, ""); } @@ -288,8 +363,8 @@ emit_mask_scatter(struct lp_build_nir_soa_context *bld, for (i = 0; i < bld->bld_base.base.type.length; i++) { LLVMValueRef ii = lp_build_const_int32(gallivm, i); LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); - LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); + LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(val), base_ptr, &index, 1, "scatter_ptr"); LLVMValueRef scalar_pred = pred ? LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; @@ -299,7 +374,7 @@ emit_mask_scatter(struct lp_build_nir_soa_context *bld, if (scalar_pred) { LLVMValueRef real_val, dst_val; - dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); + dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, ""); scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), ""); real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, ""); LLVMBuildStore(builder, real_val, scalar_ptr); @@ -429,30 +504,30 @@ static void emit_load_var(struct lp_build_nir_context *bld_base, LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc)); LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld, attrib_index_val, 4, idx, - TRUE); + true); LLVMValueRef index_vec2 = NULL; - LLVMTypeRef fptr_type; - LLVMValueRef inputs_array; - fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); - inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, ""); + LLVMTypeRef scalar_type = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, LLVMPointerType(scalar_type, 0), ""); if (bit_size == 64) index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, - indir_index, 4, idx + 1, TRUE); + indir_index, 4, idx + 1, true); /* Gather values from the input register array */ - result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2); + result[i] = build_gather(bld_base, &bld_base->base, scalar_type, inputs_array, index_vec, NULL, index_vec2); } else { if (bld->indirects & nir_var_shader_in) { LLVMValueRef lindex = lp_build_const_int32(gallivm, comp_loc * 4 + idx); - LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder, - bld->inputs_array, lindex); + LLVMValueRef input_ptr = lp_build_pointer_get2(gallivm->builder, + bld->bld_base.base.vec_type, + bld->inputs_array, lindex); if (bit_size == 64) { LLVMValueRef lindex2 = lp_build_const_int32(gallivm, comp_loc * 4 + (idx + 1)); - LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder, - bld->inputs_array, lindex2); + LLVMValueRef input_ptr2 = lp_build_pointer_get2(gallivm->builder, + bld->bld_base.base.vec_type, + bld->inputs_array, lindex2); result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2); } else { result[i] = input_ptr; @@ -473,7 +548,7 @@ static void emit_load_var(struct lp_build_nir_context *bld_base, break; case nir_var_shader_out: if (bld->fs_iface && bld->fs_iface->fb_fetch) { - bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result); + bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result); return; } for (unsigned i = 0; i < num_components; i++) { @@ -570,34 +645,131 @@ static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base, attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location)); } else attrib_index_val = lp_build_const_int32(gallivm, const_index + location); + LLVMValueRef exec_mask = mask_vec(bld_base); + if (bit_size == 64) { + LLVMValueRef split_vals[2]; + LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1); + emit_store_64bit_split(bld_base, chan_val, split_vals); + if (bld->mesh_iface) { + bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index ? true : false, + attrib_index_val, + false, swizzle_index_val, + split_vals[0], exec_mask); + bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index ? true : false, + attrib_index_val, + false, swizzle_index_val2, + split_vals[1], exec_mask); + } else { + bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index ? true : false, + attrib_index_val, + false, swizzle_index_val, + split_vals[0], exec_mask); + bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index ? true : false, + attrib_index_val, + false, swizzle_index_val2, + split_vals[1], exec_mask); + } + } else { + chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, ""); + if (bld->mesh_iface) { + bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index && !is_compact ? true : false, + attrib_index_val, + indir_index && is_compact ? true : false, + swizzle_index_val, + chan_val, exec_mask); + } else { + bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index && !is_compact ? true : false, + attrib_index_val, + indir_index && is_compact ? true : false, + swizzle_index_val, + chan_val, exec_mask); + } + } +} + +static void emit_store_mesh_chan(struct lp_build_nir_context *bld_base, + bool is_compact, + unsigned bit_size, + unsigned location, + unsigned const_index, + LLVMValueRef indir_vertex_index, + LLVMValueRef indir_index, + unsigned comp, + unsigned chan, + LLVMValueRef chan_val) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + unsigned swizzle = chan; + if (bit_size == 64) { + swizzle += const_index; + swizzle *= 2; + swizzle += comp; + if (swizzle >= 4) { + swizzle -= 4; + location++; + } + } else + swizzle += comp; + LLVMValueRef attrib_index_val; + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle); + + if (indir_index) { + if (is_compact) { + swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle)); + attrib_index_val = lp_build_const_int32(gallivm, location); + } else + attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location)); + } else + attrib_index_val = lp_build_const_int32(gallivm, location + const_index); + LLVMValueRef exec_mask = mask_vec(bld_base); if (bit_size == 64) { LLVMValueRef split_vals[2]; LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1); emit_store_64bit_split(bld_base, chan_val, split_vals); - bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, - indir_vertex_index ? true : false, - indir_vertex_index, - indir_index ? true : false, - attrib_index_val, - false, swizzle_index_val, - split_vals[0], mask_vec(bld_base)); - bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, - indir_vertex_index ? true : false, - indir_vertex_index, - indir_index ? true : false, - attrib_index_val, - false, swizzle_index_val2, - split_vals[1], mask_vec(bld_base)); + bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index ? true : false, + attrib_index_val, + false, swizzle_index_val, + split_vals[0], exec_mask); + bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index ? true : false, + attrib_index_val, + false, swizzle_index_val2, + split_vals[1], exec_mask); } else { chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, ""); - bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, - indir_vertex_index ? true : false, - indir_vertex_index, - indir_index && !is_compact ? true : false, - attrib_index_val, - indir_index && is_compact ? true : false, - swizzle_index_val, - chan_val, mask_vec(bld_base)); + bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index && !is_compact ? true : false, + attrib_index_val, + indir_index && is_compact ? true : false, + swizzle_index_val, + chan_val, exec_mask); } } @@ -634,7 +806,9 @@ static void emit_store_var(struct lp_build_nir_context *bld_base, for (unsigned chan = 0; chan < num_components; chan++) { if (writemask & (1u << chan)) { LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, ""); - if (bld->tcs_iface) { + if (bld->mesh_iface) { + emit_store_mesh_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val); + } else if (bld->tcs_iface) { emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val); } else emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val); @@ -647,34 +821,65 @@ static void emit_store_var(struct lp_build_nir_context *bld_base, } } +/** + * Returns the address of the given constant array index and channel in a + * nir register. + */ +static LLVMValueRef reg_chan_pointer(struct lp_build_nir_context *bld_base, + struct lp_build_context *reg_bld, + const nir_intrinsic_instr *decl, + LLVMValueRef reg_storage, + int array_index, int chan) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + int nc = nir_intrinsic_num_components(decl); + int num_array_elems = nir_intrinsic_num_array_elems(decl); + + LLVMTypeRef chan_type = reg_bld->vec_type; + if (nc > 1) + chan_type = LLVMArrayType(chan_type, nc); + + if (num_array_elems > 0) { + LLVMTypeRef array_type = LLVMArrayType(chan_type, num_array_elems); + reg_storage = lp_build_array_get_ptr2(gallivm, array_type, reg_storage, + lp_build_const_int32(gallivm, array_index)); + } + if (nc > 1) { + reg_storage = lp_build_array_get_ptr2(gallivm, chan_type, reg_storage, + lp_build_const_int32(gallivm, chan)); + } + + return reg_storage; +} + static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base, struct lp_build_context *reg_bld, - const nir_reg_src *reg, + const nir_intrinsic_instr *decl, + unsigned base, LLVMValueRef indir_src, LLVMValueRef reg_storage) { struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; - int nc = reg->reg->num_components; + int nc = nir_intrinsic_num_components(decl); + int num_array_elems = nir_intrinsic_num_array_elems(decl); LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL }; struct lp_build_context *uint_bld = &bld_base->uint_bld; - if (reg->reg->num_array_elems) { - LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset); - if (reg->indirect) { - LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1); - indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, ""); - indirect_val = lp_build_min(uint_bld, indirect_val, max_index); - } + if (indir_src != NULL) { + LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base); + LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1); + indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, ""); + indirect_val = lp_build_min(uint_bld, indirect_val, max_index); reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), ""); for (unsigned i = 0; i < nc; i++) { - LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE); - vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL); + LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true); + vals[i] = build_gather(bld_base, reg_bld, reg_bld->elem_type, reg_storage, indirect_offset, NULL, NULL); } } else { for (unsigned i = 0; i < nc; i++) { - LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage, - lp_build_const_int32(gallivm, i)); - vals[i] = LLVMBuildLoad(builder, this_storage, ""); + vals[i] = LLVMBuildLoad2(builder, reg_bld->vec_type, + reg_chan_pointer(bld_base, reg_bld, decl, reg_storage, + base, i), ""); } } return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc); @@ -682,8 +887,9 @@ static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base, static void emit_store_reg(struct lp_build_nir_context *bld_base, struct lp_build_context *reg_bld, - const nir_reg_dest *reg, + const nir_intrinsic_instr *decl, unsigned writemask, + unsigned base, LLVMValueRef indir_src, LLVMValueRef reg_storage, LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS]) @@ -692,19 +898,18 @@ static void emit_store_reg(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; - int nc = reg->reg->num_components; - if (reg->reg->num_array_elems > 0) { - LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset); - if (reg->indirect) { - LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1); - indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, ""); - indirect_val = lp_build_min(uint_bld, indirect_val, max_index); - } + int nc = nir_intrinsic_num_components(decl); + int num_array_elems = nir_intrinsic_num_array_elems(decl); + if (indir_src != NULL) { + LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base); + LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1); + indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, ""); + indirect_val = lp_build_min(uint_bld, indirect_val, max_index); reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), ""); for (unsigned i = 0; i < nc; i++) { if (!(writemask & (1 << i))) continue; - LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE); + LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true); dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, ""); emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask); } @@ -712,10 +917,12 @@ static void emit_store_reg(struct lp_build_nir_context *bld_base, } for (unsigned i = 0; i < nc; i++) { - LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage, - lp_build_const_int32(gallivm, i)); + if (!(writemask & (1 << i))) + continue; dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, ""); - lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage); + lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], + reg_chan_pointer(bld_base, reg_bld, decl, reg_storage, + base, i)); } } @@ -741,14 +948,16 @@ static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base, kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, ""); if (offset_is_uniform) { - offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); + offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), ""); for (unsigned c = 0; c < nc; c++) { LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), ""); - LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset); + LLVMValueRef scalar = lp_build_pointer_get2(builder, bld_broad->elem_type, kernel_args_ptr, this_offset); result[c] = lp_build_broadcast_scalar(bld_broad, scalar); } + } else { + unreachable("load_kernel_arg must have a uniform offset."); } } @@ -773,10 +982,49 @@ static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueR return addr_ptr; } +static LLVMValueRef global_addr_to_ptr_vec(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned length, unsigned bit_size) +{ + LLVMBuilderRef builder = gallivm->builder; + switch (bit_size) { + case 8: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), length), ""); + break; + case 16: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), length), ""); + break; + case 32: + default: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), length), ""); + break; + case 64: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), length), ""); + break; + } + return addr_ptr; +} + +static LLVMValueRef lp_vec_add_offset_ptr(struct lp_build_nir_context *bld_base, + unsigned bit_size, + LLVMValueRef ptr, + LLVMValueRef offset) +{ + unsigned pointer_size = 8 * sizeof(void *); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_context *ptr_bld = get_int_bld(bld_base, true, pointer_size); + LLVMValueRef result = LLVMBuildPtrToInt(builder, ptr, ptr_bld->vec_type, ""); + if (pointer_size == 64) + offset = LLVMBuildZExt(builder, offset, ptr_bld->vec_type, ""); + result = LLVMBuildAdd(builder, offset, result, ""); + return global_addr_to_ptr_vec(gallivm, result, uint_bld->type.length, bit_size); +} + static void emit_load_global(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, unsigned addr_bit_size, + bool offset_is_uniform, LLVMValueRef addr, LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) { @@ -784,34 +1032,38 @@ static void emit_load_global(struct lp_build_nir_context *bld_base, LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; struct lp_build_context *res_bld; + LLVMValueRef exec_mask = mask_vec(bld_base); res_bld = get_int_bld(bld_base, true, bit_size); - for (unsigned c = 0; c < nc; c++) { - LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, ""); - LLVMValueRef exec_mask = mask_vec(bld_base); - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); - - struct lp_build_if_state ifthen; - LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - lp_build_if(&ifthen, gallivm, cond); - + /* Note, we don't use first_active_invocation here, since we aren't + * guaranteed that there is actually an active invocation. + */ + if (offset_is_uniform && invocation_0_must_be_active(bld_base)) { + /* If the offset is uniform, then use the address from invocation 0 to + * load, and broadcast to all invocations. + */ LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, - loop_state.counter, ""); + lp_build_const_int32(gallivm, 0), ""); addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size); - LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c)); + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef scalar = lp_build_pointer_get2(builder, res_bld->elem_type, + addr_ptr, lp_build_const_int32(gallivm, c)); + outval[c] = lp_build_broadcast_scalar(res_bld, scalar); + } + return; + } - LLVMValueRef temp_res; - temp_res = LLVMBuildLoad(builder, result, ""); - temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, result); - lp_build_endif(&ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); - outval[c] = LLVMBuildLoad(builder, result, ""); + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)); + + outval[c] = lp_build_masked_gather(gallivm, res_bld->type.length, + bit_size, + res_bld->vec_type, + lp_vec_add_offset_ptr(bld_base, bit_size, addr, chan_offset), + exec_mask); + outval[c] = LLVMBuildBitCast(builder, outval[c], res_bld->vec_type, ""); } } @@ -825,51 +1077,25 @@ static void emit_store_global(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMValueRef exec_mask = mask_vec(bld_base); for (unsigned c = 0; c < nc; c++) { if (!(writemask & (1u << c))) continue; LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); - - LLVMValueRef exec_mask = mask_vec(bld_base); - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); - LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, - loop_state.counter, ""); - - LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, - loop_state.counter, ""); - addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size); - switch (bit_size) { - case 8: - value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), ""); - break; - case 16: - value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), ""); - break; - case 32: - value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), ""); - break; - case 64: - value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), ""); - break; - default: - break; - } - struct lp_build_if_state ifthen; - - LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - lp_build_if(&ifthen, gallivm, cond); - lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr); - lp_build_endif(&ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); + LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)); + + struct lp_build_context *out_bld = get_int_bld(bld_base, false, bit_size); + val = LLVMBuildBitCast(builder, val, out_bld->vec_type, ""); + lp_build_masked_scatter(gallivm, out_bld->type.length, bit_size, + lp_vec_add_offset_ptr(bld_base, bit_size, + addr, chan_offset), + val, exec_mask); } } static void emit_atomic_global(struct lp_build_nir_context *bld_base, - nir_intrinsic_op nir_op, + nir_atomic_op nir_op, unsigned addr_bit_size, unsigned val_bit_size, LLVMValueRef addr, @@ -879,15 +1105,20 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; - struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size); + bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float; + struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size); + if (is_flt) + val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, ""); + LLVMValueRef atom_res = lp_build_alloca(gallivm, - LLVMTypeOf(val), ""); + atom_bld->vec_type, ""); LLVMValueRef exec_mask = mask_vec(bld_base); struct lp_build_loop_state loop_state; lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, loop_state.counter, ""); + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atom_bld->elem_type, ""); LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, loop_state.counter, ""); @@ -900,7 +1131,7 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base, lp_build_if(&ifthen, gallivm, cond); addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), ""); - if (nir_op == nir_intrinsic_global_atomic_comp_swap) { + if (val2 != NULL /* compare-and-swap */) { LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2, loop_state.counter, ""); cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, ""); @@ -911,70 +1142,57 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base, false); scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); } else { - LLVMAtomicRMWBinOp op; - switch (nir_op) { - case nir_intrinsic_global_atomic_add: - op = LLVMAtomicRMWBinOpAdd; - break; - case nir_intrinsic_global_atomic_exchange: - - op = LLVMAtomicRMWBinOpXchg; - break; - case nir_intrinsic_global_atomic_and: - op = LLVMAtomicRMWBinOpAnd; - break; - case nir_intrinsic_global_atomic_or: - op = LLVMAtomicRMWBinOpOr; - break; - case nir_intrinsic_global_atomic_xor: - op = LLVMAtomicRMWBinOpXor; - break; - case nir_intrinsic_global_atomic_umin: - op = LLVMAtomicRMWBinOpUMin; - break; - case nir_intrinsic_global_atomic_umax: - op = LLVMAtomicRMWBinOpUMax; - break; - case nir_intrinsic_global_atomic_imin: - op = LLVMAtomicRMWBinOpMin; - break; - case nir_intrinsic_global_atomic_imax: - op = LLVMAtomicRMWBinOpMax; - break; - default: - unreachable("unknown atomic op"); - } - - scalar = LLVMBuildAtomicRMW(builder, op, + scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op), addr_ptr, value_ptr, LLVMAtomicOrderingSequentiallyConsistent, false); } - temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, ""); temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); LLVMBuildStore(builder, temp_res, atom_res); lp_build_else(&ifthen); - temp_res = LLVMBuildLoad(builder, atom_res, ""); - bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type; - LLVMValueRef zero_val; - if (is_float) { - if (val_bit_size == 64) - zero_val = lp_build_const_double(gallivm, 0); - else - zero_val = lp_build_const_float(gallivm, 0); - } else { - if (val_bit_size == 64) - zero_val = lp_build_const_int64(gallivm, 0); - else - zero_val = lp_build_const_int32(gallivm, 0); - } - + temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, ""); + LLVMValueRef zero_val = lp_build_zero_bits(gallivm, val_bit_size, is_flt); temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, ""); LLVMBuildStore(builder, temp_res, atom_res); lp_build_endif(&ifthen); lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), NULL, LLVMIntUGE); - *result = LLVMBuildLoad(builder, atom_res, ""); + *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, ""); +} + +/* Returns a boolean for whether the offset is in range of the given limit for + * SSBO/UBO dereferences. + */ +static LLVMValueRef +lp_offset_in_range(struct lp_build_nir_context *bld_base, + LLVMValueRef offset, + LLVMValueRef limit) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + + LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), ""); + LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, ""); + LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), ""); + return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, ""); +} + +static LLVMValueRef +build_resource_to_scalar(struct lp_build_nir_context *bld_base, LLVMValueRef resource) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + + LLVMValueRef invocation = first_active_invocation(bld_base); + + LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, resource, 0, ""); + set = LLVMBuildExtractElement(gallivm->builder, set, invocation, ""); + + LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, resource, 1, ""); + binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, ""); + + LLVMValueRef components[2] = { set, binding }; + return lp_nir_array_build_gather_values(gallivm->builder, components, 2); } static void emit_load_ubo(struct lp_build_nir_context *bld_base, @@ -985,12 +1203,16 @@ static void emit_load_ubo(struct lp_build_nir_context *bld_base, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { + if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind) + index = build_resource_to_scalar(bld_base, index); + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size); - LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index); + LLVMValueRef consts_ptr = lp_llvm_buffer_base(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS); + LLVMValueRef num_consts = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS); unsigned size_shift = bit_size_to_shift_size(bit_size); if (size_shift) offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift)); @@ -999,17 +1221,41 @@ static void emit_load_ubo(struct lp_build_nir_context *bld_base, consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, ""); if (offset_is_uniform) { - offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); - + offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), ""); + struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size); + switch (bit_size) { + case 8: + num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), ""); + break; + case 16: + num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), ""); + break; + case 64: + num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), ""); + break; + default: break; + } for (unsigned c = 0; c < nc; c++) { - LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), ""); + LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), ""); - LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset); - result[c] = lp_build_broadcast_scalar(bld_broad, scalar); + LLVMValueRef scalar; + /* If loading outside the UBO, we need to skip the load and read 0 instead. */ + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false); + LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), ""); + LLVMBuildStore(builder, zero, res_store); + + struct lp_build_if_state ifthen; + lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts)); + LLVMBuildStore(builder, lp_build_pointer_get2(builder, bld_broad->elem_type, + consts_ptr, chan_offset), res_store); + lp_build_endif(&ifthen); + + scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, ""); + + result[c] = lp_build_broadcast_scalar(load_bld, scalar); } } else { LLVMValueRef overflow_mask; - LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index); num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); if (bit_size == 64) @@ -1023,15 +1269,108 @@ static void emit_load_ubo(struct lp_build_nir_context *bld_base, LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, this_offset, num_consts); - result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL); + result[c] = build_gather(bld_base, bld_broad, bld_broad->elem_type, consts_ptr, this_offset, overflow_mask, NULL); } } } +static void +emit_load_const(struct lp_build_nir_context *bld_base, + const nir_load_const_instr *instr, + LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) +{ + struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size); + const unsigned bits = instr->def.bit_size; + + for (unsigned i = 0; i < instr->def.num_components; i++) { + outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, + bits == 32 ? instr->value[i].u32 + : instr->value[i].u64); + } + for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) { + outval[i] = NULL; + } +} + +/** + * Get the base address of SSBO[@index] for the @invocation channel, returning + * the address and also the bounds (in units of the bit_size). + */ +static LLVMValueRef +ssbo_base_pointer(struct lp_build_nir_context *bld_base, + unsigned bit_size, + LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + uint32_t shift_val = bit_size_to_shift_size(bit_size); + + LLVMValueRef ssbo_idx; + LLVMValueRef buffers; + uint32_t buffers_limit; + if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind) { + LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, index, 0, ""); + set = LLVMBuildExtractElement(gallivm->builder, set, invocation, ""); + + LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, index, 1, ""); + binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, ""); + + LLVMValueRef components[2] = { set, binding }; + ssbo_idx = lp_nir_array_build_gather_values(gallivm->builder, components, 2); + + buffers = bld->consts_ptr; + buffers_limit = LP_MAX_TGSI_CONST_BUFFERS; + } else { + ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, ""); + + buffers = bld->ssbo_ptr; + buffers_limit = LP_MAX_TGSI_SHADER_BUFFERS; + } + + LLVMValueRef ssbo_size_ptr = lp_llvm_buffer_num_elements(gallivm, buffers, ssbo_idx, buffers_limit); + LLVMValueRef ssbo_ptr = lp_llvm_buffer_base(gallivm, buffers, ssbo_idx, buffers_limit); + if (bounds) + *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), ""); + + return ssbo_ptr; +} + +static LLVMValueRef +mem_access_base_pointer(struct lp_build_nir_context *bld_base, + struct lp_build_context *mem_bld, + unsigned bit_size, bool payload, + LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMValueRef ptr; + + if (index) { + ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds); + } else { + if (payload) { + ptr = bld->payload_ptr; + ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, ""); + ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), ""); + ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + } + else + ptr = bld->shared_ptr; + *bounds = NULL; + } + + /* Cast it to the pointer type of the access this instruction is doing. */ + if (bit_size == 32 && !mem_bld->type.floating) + return ptr; + else + return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), ""); +} static void emit_load_mem(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, + bool index_and_offset_are_uniform, + bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) @@ -1040,7 +1379,6 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; - LLVMValueRef ssbo_limit = NULL; struct lp_build_context *load_bld; uint32_t shift_val = bit_size_to_shift_size(bit_size); @@ -1048,6 +1386,45 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), ""); + /* If the address is uniform, then use the address from the first active + * invocation 0 to load, and broadcast to all invocations. We can't do + * computed first active invocation for shared accesses (index == NULL), + * though, since those don't do bounds checking and we could use an invalid + * offset if exec_mask == 0. + */ + if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) { + LLVMValueRef ssbo_limit; + LLVMValueRef first_active = first_active_invocation(bld_base); + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index, + first_active, &ssbo_limit); + + offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, ""); + + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), ""); + + LLVMValueRef scalar; + /* If loading outside the SSBO, we need to skip the load and read 0 instead. */ + if (ssbo_limit) { + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false); + LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), ""); + LLVMBuildStore(builder, zero, res_store); + + struct lp_build_if_state ifthen; + lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit)); + LLVMBuildStore(builder, lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset), res_store); + lp_build_endif(&ifthen); + + scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, ""); + } else { + scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset); + } + + outval[c] = lp_build_broadcast_scalar(load_bld, scalar); + } + return; + } + /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]; @@ -1056,70 +1433,50 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, LLVMValueRef exec_mask = mask_vec(bld_base); LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); - LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); + for (unsigned i = 0; i < uint_bld->type.length; i++) { + LLVMValueRef counter = lp_build_const_int32(gallivm, i); + LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, ""); - struct lp_build_if_state exec_ifthen; - lp_build_if(&exec_ifthen, gallivm, loop_cond); + struct lp_build_if_state exec_ifthen; + lp_build_if(&exec_ifthen, gallivm, loop_cond); - LLVMValueRef mem_ptr; + LLVMValueRef ssbo_limit; + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index, + counter, &ssbo_limit); - if (index) { - LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, ""); - LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx); - LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx); - ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), ""); - mem_ptr = ssbo_ptr; - } else - mem_ptr = bld->shared_ptr; + LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, ""); - for (unsigned c = 0; c < nc; c++) { - LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), ""); - LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1); - if (ssbo_limit) { - LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit); - do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, ""); - } + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), ""); + LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1); + if (ssbo_limit) { + LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit); + do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, ""); + } - struct lp_build_if_state ifthen; - LLVMValueRef fetch_cond, temp_res; + struct lp_build_if_state ifthen; + LLVMValueRef fetch_cond, temp_res; - fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), ""); + fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), ""); - lp_build_if(&ifthen, gallivm, fetch_cond); - LLVMValueRef scalar; - if (bit_size != 32) { - LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(load_bld->elem_type, 0), ""); - scalar = lp_build_pointer_get(builder, mem_ptr2, loop_index); - } else - scalar = lp_build_pointer_get(builder, mem_ptr, loop_index); + lp_build_if(&ifthen, gallivm, fetch_cond); + LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index); - temp_res = LLVMBuildLoad(builder, result[c], ""); - temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, result[c]); - lp_build_else(&ifthen); - temp_res = LLVMBuildLoad(builder, result[c], ""); - LLVMValueRef zero; - if (bit_size == 64) - zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0); - else if (bit_size == 16) - zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0); - else if (bit_size == 8) - zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0); - else - zero = lp_build_const_int32(gallivm, 0); - temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, result[c]); - lp_build_endif(&ifthen); - } + temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, ""); + LLVMBuildStore(builder, temp_res, result[c]); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], ""); + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false); + temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, ""); + LLVMBuildStore(builder, temp_res, result[c]); + lp_build_endif(&ifthen); + } - lp_build_endif(&exec_ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); + lp_build_endif(&exec_ifthen); + } for (unsigned c = 0; c < nc; c++) - outval[c] = LLVMBuildLoad(gallivm->builder, result[c], ""); + outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], ""); } @@ -1127,6 +1484,8 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base, unsigned writemask, unsigned nc, unsigned bit_size, + bool index_and_offset_are_uniform, + bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef dst) @@ -1134,71 +1493,98 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; - LLVMValueRef mem_ptr; struct lp_build_context *uint_bld = &bld_base->uint_bld; - LLVMValueRef ssbo_limit = NULL; struct lp_build_context *store_bld; uint32_t shift_val = bit_size_to_shift_size(bit_size); store_bld = get_int_bld(bld_base, true, bit_size); offset = lp_build_shr_imm(uint_bld, offset, shift_val); + /* If the address is uniform, then just store the value from the first + * channel instead of making LLVM unroll the invocation loop. Note that we + * don't use first_active_uniform(), since we aren't guaranteed that there is + * actually an active invocation. + */ + if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) { + LLVMValueRef ssbo_limit; + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index, + lp_build_const_int32(gallivm, 0), &ssbo_limit); + + offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), ""); + + for (unsigned c = 0; c < nc; c++) { + if (!(writemask & (1u << c))) + continue; + + /* Pick out invocation 0's value. */ + LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + lp_build_const_int32(gallivm, 0), ""); + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, ""); + + LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), ""); + + /* If storing outside the SSBO, we need to skip the store instead. */ + if (ssbo_limit) { + struct lp_build_if_state ifthen; + lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit)); + lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr); + lp_build_endif(&ifthen); + } else { + lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr); + } + } + return; + } + LLVMValueRef exec_mask = mask_vec(bld_base); LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); - LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); + for (unsigned i = 0; i < uint_bld->type.length; i++) { + LLVMValueRef counter = lp_build_const_int32(gallivm, i); + LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, ""); - struct lp_build_if_state exec_ifthen; - lp_build_if(&exec_ifthen, gallivm, loop_cond); + struct lp_build_if_state exec_ifthen; + lp_build_if(&exec_ifthen, gallivm, loop_cond); - if (index) { - LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, ""); - LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx); - LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx); - ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), ""); - mem_ptr = ssbo_ptr; - } else - mem_ptr = bld->shared_ptr; + LLVMValueRef ssbo_limit; + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index, + counter, &ssbo_limit); - for (unsigned c = 0; c < nc; c++) { - if (!(writemask & (1u << c))) - continue; - LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), ""); - LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); - LLVMValueRef do_store = lp_build_const_int32(gallivm, -1); + LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, ""); - if (ssbo_limit) { - LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit); - do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, ""); - } + for (unsigned c = 0; c < nc; c++) { + if (!(writemask & (1u << c))) + continue; + LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), ""); + LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); + LLVMValueRef do_store = lp_build_const_int32(gallivm, -1); - LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, - loop_state.counter, ""); - value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, ""); - struct lp_build_if_state ifthen; - LLVMValueRef store_cond; + if (ssbo_limit) { + LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit); + do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, ""); + } - store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), ""); - lp_build_if(&ifthen, gallivm, store_cond); - if (bit_size != 32) { - LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(store_bld->elem_type, 0), ""); - lp_build_pointer_set(builder, mem_ptr2, loop_index, value_ptr); - } else - lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr); - lp_build_endif(&ifthen); - } + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + counter, ""); + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, ""); + struct lp_build_if_state ifthen; + LLVMValueRef store_cond; - lp_build_endif(&exec_ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); + store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), ""); + lp_build_if(&ifthen, gallivm, store_cond); + lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr); + lp_build_endif(&ifthen); + } + lp_build_endif(&exec_ifthen); + } } + static void emit_atomic_mem(struct lp_build_nir_context *bld_base, - nir_intrinsic_op nir_op, + nir_atomic_op nir_op, uint32_t bit_size, + bool payload, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef val, LLVMValueRef val2, LLVMValueRef *result) @@ -1207,9 +1593,9 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; - LLVMValueRef ssbo_limit = NULL; uint32_t shift_val = bit_size_to_shift_size(bit_size); - struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size); + bool is_float = nir_atomic_op_type(nir_op) == nir_type_float; + struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size); offset = lp_build_shr_imm(uint_bld, offset, shift_val); LLVMValueRef atom_res = lp_build_alloca(gallivm, @@ -1217,120 +1603,67 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, LLVMValueRef exec_mask = mask_vec(bld_base); LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); - LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); - - struct lp_build_if_state exec_ifthen; - lp_build_if(&exec_ifthen, gallivm, loop_cond); + for (unsigned i = 0; i < uint_bld->type.length; i++) { + LLVMValueRef counter = lp_build_const_int32(gallivm, i); + LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, ""); - LLVMValueRef mem_ptr; - if (index) { - LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, ""); - LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx); - LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx); - ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), ""); - mem_ptr = ssbo_ptr; - } else - mem_ptr = bld->shared_ptr; + struct lp_build_if_state exec_ifthen; + lp_build_if(&exec_ifthen, gallivm, loop_cond); - LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1); - if (ssbo_limit) { - LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit); - do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, ""); - } + LLVMValueRef ssbo_limit; + LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index, + counter, &ssbo_limit); - LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, - loop_state.counter, ""); - value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, ""); + LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, ""); - LLVMValueRef scalar_ptr; - if (bit_size != 32) { - LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(atomic_bld->elem_type, 0), ""); - scalar_ptr = LLVMBuildGEP(builder, mem_ptr2, &loop_offset, 1, ""); - } else - scalar_ptr = LLVMBuildGEP(builder, mem_ptr, &loop_offset, 1, ""); + LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1); + if (ssbo_limit) { + LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit); + do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, ""); + } - struct lp_build_if_state ifthen; - LLVMValueRef inner_cond, temp_res; - LLVMValueRef scalar; + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + counter, ""); + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, ""); - inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), ""); - lp_build_if(&ifthen, gallivm, inner_cond); + LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, ""); - if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) { - LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2, - loop_state.counter, ""); - cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, ""); - scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr, - cas_src_ptr, - LLVMAtomicOrderingSequentiallyConsistent, - LLVMAtomicOrderingSequentiallyConsistent, - false); - scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); - } else { - LLVMAtomicRMWBinOp op; + struct lp_build_if_state ifthen; + LLVMValueRef inner_cond, temp_res; + LLVMValueRef scalar; - switch (nir_op) { - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_ssbo_atomic_add: - op = LLVMAtomicRMWBinOpAdd; - break; - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_ssbo_atomic_exchange: - op = LLVMAtomicRMWBinOpXchg; - break; - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_ssbo_atomic_and: - op = LLVMAtomicRMWBinOpAnd; - break; - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_ssbo_atomic_or: - op = LLVMAtomicRMWBinOpOr; - break; - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_ssbo_atomic_xor: - op = LLVMAtomicRMWBinOpXor; - break; - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_ssbo_atomic_umin: - op = LLVMAtomicRMWBinOpUMin; - break; - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_ssbo_atomic_umax: - op = LLVMAtomicRMWBinOpUMax; - break; - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_shared_atomic_imin: - op = LLVMAtomicRMWBinOpMin; - break; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_shared_atomic_imax: - op = LLVMAtomicRMWBinOpMax; - break; - default: - unreachable("unknown atomic op"); + inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), ""); + lp_build_if(&ifthen, gallivm, inner_cond); + + if (val2 != NULL) { + LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2, + counter, ""); + cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, ""); + scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr, + cas_src_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + LLVMAtomicOrderingSequentiallyConsistent, + false); + scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); + } else { + scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op), + scalar_ptr, value_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + false); } - scalar = LLVMBuildAtomicRMW(builder, op, - scalar_ptr, value_ptr, - LLVMAtomicOrderingSequentiallyConsistent, - false); - } - temp_res = LLVMBuildLoad(builder, atom_res, ""); - temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, atom_res); - lp_build_else(&ifthen); - temp_res = LLVMBuildLoad(builder, atom_res, ""); - LLVMValueRef zero = bit_size == 64 ? lp_build_const_int64(gallivm, 0) : lp_build_const_int32(gallivm, 0); - temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, atom_res); - lp_build_endif(&ifthen); + temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, ""); + LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float); + temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_endif(&ifthen); - lp_build_endif(&exec_ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); - *result = LLVMBuildLoad(builder, atom_res, ""); + lp_build_endif(&exec_ifthen); + } + *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, ""); } static void emit_barrier(struct lp_build_nir_context *bld_base) @@ -1347,13 +1680,12 @@ static void emit_barrier(struct lp_build_nir_context *bld_base) static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base, LLVMValueRef index) { - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_build_context *bld_broad = &bld_base->uint_bld; - LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr, - LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); - return lp_build_broadcast_scalar(bld_broad, size_ptr); + + LLVMValueRef size; + ssbo_base_pointer(bld_base, 8, index, first_active_invocation(bld_base), &size); + + return lp_build_broadcast_scalar(bld_broad, size); } static void emit_image_op(struct lp_build_nir_context *bld_base, @@ -1363,13 +1695,18 @@ static void emit_image_op(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; params->type = bld_base->base.type; - params->context_ptr = bld->context_ptr; + params->resources_type = bld->resources_type; + params->resources_ptr = bld->resources_ptr; + params->thread_data_type = bld->thread_data_type; params->thread_data_ptr = bld->thread_data_ptr; params->exec_mask = mask_vec(bld_base); if (params->image_index_offset) params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset, - lp_build_const_int32(gallivm, 0), ""); + first_active_invocation(bld_base), ""); + + if (params->resource) + params->resource = build_resource_to_scalar(bld_base, params->resource); bld->image->emit_op(bld->image, bld->bld_base.base.gallivm, @@ -1384,11 +1721,11 @@ static void emit_image_size(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; params->int_type = bld_base->int_bld.type; - params->context_ptr = bld->context_ptr; - + params->resources_type = bld->resources_type; + params->resources_ptr = bld->resources_ptr; if (params->texture_unit_offset) params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset, - lp_build_const_int32(gallivm, 0), ""); + first_active_invocation(bld_base), ""); bld->image->emit_size_query(bld->image, bld->bld_base.base.gallivm, params); @@ -1440,8 +1777,11 @@ static void emit_tex(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; params->type = bld_base->base.type; - params->context_ptr = bld->context_ptr; + params->resources_type = bld->resources_type; + params->resources_ptr = bld->resources_ptr; + params->thread_data_type = bld->thread_data_type; params->thread_data_ptr = bld->thread_data_ptr; + params->exec_mask = mask_vec(bld_base); if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) { /* this is horrible but this can be dynamic */ @@ -1491,10 +1831,16 @@ static void emit_tex(struct lp_build_nir_context *bld_base, return; } - if (params->texture_index_offset) - params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder, - params->texture_index_offset, - lp_build_const_int32(bld_base->base.gallivm, 0), ""); + if (params->texture_index_offset) { + params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset, + first_active_invocation(bld_base), ""); + } + + if (params->texture_resource) + params->texture_resource = build_resource_to_scalar(bld_base, params->texture_resource); + + if (params->sampler_resource) + params->sampler_resource = build_resource_to_scalar(bld_base, params->sampler_resource); params->type = bld_base->base.type; bld->sampler->emit_tex_sample(bld->sampler, @@ -1508,24 +1854,45 @@ static void emit_tex_size(struct lp_build_nir_context *bld_base, struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; params->int_type = bld_base->int_bld.type; - params->context_ptr = bld->context_ptr; - + params->resources_type = bld->resources_type; + params->resources_ptr = bld->resources_ptr; if (params->texture_unit_offset) params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder, params->texture_unit_offset, lp_build_const_int32(bld_base->base.gallivm, 0), ""); + + params->exec_mask = mask_vec(bld_base); + if (params->resource) + params->resource = build_resource_to_scalar(bld_base, params->resource); + bld->sampler->emit_size_query(bld->sampler, bld->bld_base.base.gallivm, params); } +static LLVMValueRef get_local_invocation_index(struct lp_build_nir_soa_context *bld) +{ + struct lp_build_nir_context *bld_base = &bld->bld_base; + LLVMValueRef tmp, tmp2; + + tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[1]); + tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[0]); + tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2); + tmp = lp_build_mul(&bld_base->uint_bld, tmp, bld->system_values.thread_id[2]); + + tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, bld->system_values.thread_id[1]); + tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2); + tmp = lp_build_add(&bld_base->uint_bld, tmp, bld->system_values.thread_id[0]); + return tmp; +} + static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; - struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size); + struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->def.bit_size); switch (instr->intrinsic) { case nir_intrinsic_load_instance_id: result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); @@ -1548,37 +1915,22 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, case nir_intrinsic_load_workgroup_id: { LLVMValueRef tmp[3]; for (unsigned i = 0; i < 3; i++) { - tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), ""); - if (instr->dest.ssa.bit_size == 64) - tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, ""); + tmp[i] = bld->system_values.block_id[i]; result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]); } break; } case nir_intrinsic_load_local_invocation_id: for (unsigned i = 0; i < 3; i++) - result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, ""); + result[i] = bld->system_values.thread_id[i]; break; - case nir_intrinsic_load_local_invocation_index: { - LLVMValueRef tmp, tmp2; - tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 1), "")); - tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), "")); - tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2); - tmp = lp_build_mul(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 2, "")); - - tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), "")); - tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 1, "")); - tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2); - tmp = lp_build_add(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 0, "")); - result[0] = tmp; + case nir_intrinsic_load_local_invocation_index: + result[0] = get_local_invocation_index(bld); break; - } case nir_intrinsic_load_num_workgroups: { LLVMValueRef tmp[3]; for (unsigned i = 0; i < 3; i++) { - tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), ""); - if (instr->dest.ssa.bit_size == 64) - tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, ""); + tmp[i] = bld->system_values.grid_size[i]; result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]); } break; @@ -1599,7 +1951,7 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, break; case nir_intrinsic_load_workgroup_size: for (unsigned i = 0; i < 3; i++) - result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), "")); + result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[i]); break; case nir_intrinsic_load_work_dim: result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim); @@ -1627,7 +1979,8 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, for (unsigned i = 0; i < 2; i++) { LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), ""); idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), ""); - LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx); + LLVMValueRef val = lp_build_array_get2(gallivm, bld->system_values.sample_pos_type, + bld->system_values.sample_pos, idx); result[i] = lp_build_broadcast_scalar(&bld_base->base, val); } break; @@ -1670,7 +2023,7 @@ static void bgnloop(struct lp_build_nir_context *bld_base) static void endloop(struct lp_build_nir_context *bld_base) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; - lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); + lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask); } static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond) @@ -1734,7 +2087,7 @@ increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base, LLVMValueRef mask) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; - LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); + LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, ""); current_vec = LLVMBuildSub(builder, current_vec, mask, ""); @@ -1747,7 +2100,7 @@ clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base, LLVMValueRef mask) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; - LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); + LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, ""); current_vec = lp_build_select(&bld_base->uint_bld, mask, @@ -1780,7 +2133,7 @@ static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_i return; assert(bld->gs_iface->emit_vertex); LLVMValueRef total_emitted_vertices_vec = - LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], ""); + LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], ""); LLVMValueRef mask = mask_vec(bld_base); mask = clamp_mask_to_max_output_vertices(bld, mask, total_emitted_vertices_vec); @@ -1807,11 +2160,11 @@ end_primitive_masked(struct lp_build_nir_context * bld_base, return; struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMValueRef emitted_vertices_vec = - LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], ""); + LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], ""); LLVMValueRef emitted_prims_vec = - LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], ""); + LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], ""); LLVMValueRef total_emitted_vertices_vec = - LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], ""); + LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], ""); LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, @@ -1842,7 +2195,10 @@ emit_prologue(struct lp_build_nir_soa_context *bld) { struct gallivm_state * gallivm = bld->bld_base.base.gallivm; if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) { - uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read); + uint32_t num_inputs = bld->num_inputs; + /* If this is an indirect case, the number of inputs should not be 0 */ + assert(num_inputs > 0); + unsigned index, chan; LLVMTypeRef vec_type = bld->bld_base.base.vec_type; LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4); @@ -1855,8 +2211,7 @@ emit_prologue(struct lp_build_nir_soa_context *bld) LLVMValueRef lindex = lp_build_const_int32(gallivm, index * 4 + chan); LLVMValueRef input_ptr = - LLVMBuildGEP(gallivm->builder, bld->inputs_array, - &lindex, 1, ""); + LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, ""); LLVMValueRef value = bld->inputs[index][chan]; if (value) LLVMBuildStore(gallivm->builder, value, input_ptr); @@ -1893,7 +2248,7 @@ static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, lp_build_endif(&ifthen); lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length), NULL, LLVMIntUGE); - init_val = LLVMBuildLoad(builder, eq_store, ""); + init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, ""); } else { LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store); } @@ -1907,7 +2262,7 @@ static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, ""); lp_build_if(&ifthen, gallivm, if_cond); - res = LLVMBuildLoad(builder, res_store, ""); + res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""); if (instr->intrinsic == nir_intrinsic_vote_feq) { struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size); @@ -1928,7 +2283,8 @@ static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, lp_build_endif(&ifthen); lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length), NULL, LLVMIntUGE); - result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, "")); + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, + LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "")); } static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4]) @@ -1943,7 +2299,7 @@ static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, ""); - res = LLVMBuildLoad(builder, res_store, ""); + res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""); res = LLVMBuildOr(builder, res, LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), ""); @@ -1951,7 +2307,8 @@ static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length), NULL, LLVMIntUGE); - result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, "")); + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, + LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "")); } static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4]) @@ -1972,7 +2329,7 @@ static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef resul lp_build_const_int32(gallivm, -1), ""); LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, - LLVMBuildLoad(builder, found_store, ""), + LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""), lp_build_const_int32(gallivm, 0), ""); cond = LLVMBuildAnd(builder, cond, cond2, ""); @@ -1986,10 +2343,51 @@ static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef resul result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero, lp_build_const_int32(gallivm, -1), - LLVMBuildLoad(builder, idx_store, ""), + LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""), ""); } +#if LLVM_VERSION_MAJOR >= 10 +static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index, + nir_intrinsic_instr *instr, LLVMValueRef result[4]) +{ + assert(instr->intrinsic == nir_intrinsic_shuffle); + + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + uint32_t bit_size = nir_src_bit_size(instr->src[0]); + uint32_t index_bit_size = nir_src_bit_size(instr->src[1]); + struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size); + + if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) { + /* freeze `src` in case inactive invocations contain poison */ + src = LLVMBuildFreeze(builder, src, ""); + result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index); + } else { + LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, ""); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, ""); + + LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, ""); + /* freeze `src_value` in case an out-of-bounds index or an index into an + * inactive invocation results in poison + */ + src_value = LLVMBuildFreeze(builder, src_value, ""); + + LLVMValueRef res = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, ""); + res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, ""); + LLVMBuildStore(builder, res, res_store); + + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length), + NULL, LLVMIntUGE); + + result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, ""); + } +} +#endif + static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4]) { @@ -2147,9 +2545,9 @@ static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, ""); LLVMValueRef res = NULL; - LLVMValueRef scan_val = LLVMBuildLoad(gallivm->builder, scan_store, ""); + LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, ""); if (instr->intrinsic != nir_intrinsic_reduce) - res = LLVMBuildLoad(gallivm->builder, res_store, ""); + res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, ""); if (instr->intrinsic == nir_intrinsic_exclusive_scan) res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, ""); @@ -2205,9 +2603,9 @@ static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src, lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length), NULL, LLVMIntUGE); if (instr->intrinsic == nir_intrinsic_reduce) - result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad(builder, scan_store, "")); + result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad2(builder, int_bld->elem_type, scan_store, "")); else - result[0] = LLVMBuildLoad(builder, res_store, ""); + result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, ""); } static void emit_read_invocation(struct lp_build_nir_context *bld_base, @@ -2217,31 +2615,15 @@ static void emit_read_invocation(struct lp_build_nir_context *bld_base, LLVMValueRef result[4]) { struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef idx; + LLVMValueRef idx = first_active_invocation(bld_base); struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size); - if (invoc) { - idx = invoc; - idx = LLVMBuildExtractElement(gallivm->builder, idx, lp_build_const_int32(gallivm, 0), ""); - } else { - /* have to find the first active invocation */ - LLVMValueRef exec_mask = mask_vec(bld_base); - struct lp_build_loop_state loop_state; - LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, ""); - LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, ""); - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length)); - - LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, ""); - struct lp_build_if_state ifthen; - - lp_build_if(&ifthen, gallivm, if_cond); - LLVMBuildStore(builder, loop_state.counter, res_store); - lp_build_endif(&ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1), - lp_build_const_int32(gallivm, -1), LLVMIntEQ); - idx = LLVMBuildLoad(builder, res_store, ""); - } + /* If we're emitting readInvocation() (as opposed to readFirstInvocation), + * use the first active channel to pull the invocation index number out of + * the invocation arg. + */ + if (invoc) + idx = LLVMBuildExtractElement(gallivm->builder, invoc, idx, ""); LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, idx, ""); @@ -2268,6 +2650,65 @@ emit_interp_at(struct lp_build_nir_context *bld_base, } } +static void +emit_set_vertex_and_primitive_count(struct lp_build_nir_context *bld_base, + LLVMValueRef vert_count, + LLVMValueRef prim_count) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + assert(bld->mesh_iface); + LLVMValueRef idx = first_active_invocation(bld_base); + + LLVMValueRef vcount = LLVMBuildExtractElement(gallivm->builder, + vert_count, idx, ""); + LLVMValueRef pcount = LLVMBuildExtractElement(gallivm->builder, + prim_count, idx, ""); + + bld->mesh_iface->emit_vertex_and_primitive_count(bld->mesh_iface, &bld_base->base, vcount, pcount); +} + +static void +emit_launch_mesh_workgroups(struct lp_build_nir_context *bld_base, + LLVMValueRef launch_grid) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMTypeRef vec_type = LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), 3); + + LLVMValueRef local_invoc_idx = get_local_invocation_index(bld); + + vec_type = LLVMPointerType(vec_type, 0); + + local_invoc_idx = LLVMBuildExtractElement(gallivm->builder, local_invoc_idx, lp_build_const_int32(gallivm, 0), ""); + LLVMValueRef if_cond = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, local_invoc_idx, lp_build_const_int32(gallivm, 0), ""); + struct lp_build_if_state ifthen; + lp_build_if(&ifthen, gallivm, if_cond); + LLVMValueRef ptr = bld->payload_ptr; + ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, ""); + for (unsigned i = 0; i < 3; i++) { + LLVMValueRef lg = LLVMBuildExtractValue(gallivm->builder, launch_grid, i, ""); + lg = LLVMBuildExtractElement(gallivm->builder, lg, lp_build_const_int32(gallivm, 0), ""); + LLVMValueRef this_ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + LLVMBuildStore(gallivm->builder, lg, this_ptr); + ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 4), ""); + } + lp_build_endif(&ifthen); +} + +static void +emit_call(struct lp_build_nir_context *bld_base, + struct lp_build_fn *fn, + int num_args, + LLVMValueRef *args) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + + args[0] = mask_vec(bld_base); + args[1] = bld->call_context_ptr; + LLVMBuildCall2(bld_base->base.gallivm->builder, fn->fn_type, fn->fn, args, num_args, ""); +} + static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm, struct lp_type type, unsigned scratch_size) @@ -2296,54 +2737,25 @@ emit_load_scratch(struct lp_build_nir_context *bld_base, struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct lp_build_context *uint_bld = &bld_base->uint_bld; struct lp_build_context *load_bld; - LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);; - uint32_t shift_val = bit_size_to_shift_size(bit_size); - + LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size); + LLVMValueRef exec_mask = mask_vec(bld_base); + LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm, + LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length), + bld->scratch_ptr); load_bld = get_int_bld(bld_base, true, bit_size); offset = lp_build_add(uint_bld, offset, thread_offsets); - offset = lp_build_shr_imm(uint_bld, offset, shift_val); - for (unsigned c = 0; c < nc; c++) { - LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); - LLVMValueRef exec_mask = mask_vec(bld_base); - - LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, ""); - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); - - struct lp_build_if_state ifthen; - LLVMValueRef cond, temp_res; - loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, - loop_state.counter, ""); - cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - - lp_build_if(&ifthen, gallivm, cond); - LLVMValueRef scalar; - LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), ""); - scalar = lp_build_pointer_get(builder, ptr2, loop_index); - - temp_res = LLVMBuildLoad(builder, result, ""); - temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, result); - lp_build_else(&ifthen); - temp_res = LLVMBuildLoad(builder, result, ""); - LLVMValueRef zero; - if (bit_size == 64) - zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0); - else if (bit_size == 16) - zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0); - else if (bit_size == 8) - zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0); - else - zero = lp_build_const_int32(gallivm, 0); - temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); - LLVMBuildStore(builder, temp_res, result); - lp_build_endif(&ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); - outval[c] = LLVMBuildLoad(gallivm->builder, result, ""); + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8))); + + outval[c] = lp_build_masked_gather(gallivm, load_bld->type.length, bit_size, + load_bld->vec_type, + lp_vec_add_offset_ptr(bld_base, bit_size, + scratch_ptr_vec, + chan_offset), + exec_mask); + outval[c] = LLVMBuildBitCast(builder, outval[c], load_bld->vec_type, ""); } } @@ -2358,53 +2770,143 @@ emit_store_scratch(struct lp_build_nir_context *bld_base, struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct lp_build_context *uint_bld = &bld_base->uint_bld; struct lp_build_context *store_bld; - LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);; - uint32_t shift_val = bit_size_to_shift_size(bit_size); + LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size); + LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm, + LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length), + bld->scratch_ptr); store_bld = get_int_bld(bld_base, true, bit_size); LLVMValueRef exec_mask = mask_vec(bld_base); offset = lp_build_add(uint_bld, offset, thread_offsets); - offset = lp_build_shr_imm(uint_bld, offset, shift_val); for (unsigned c = 0; c < nc; c++) { if (!(writemask & (1u << c))) continue; LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); - LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); - struct lp_build_loop_state loop_state; - lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8))); - LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, - loop_state.counter, ""); - value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, ""); + val = LLVMBuildBitCast(builder, val, store_bld->vec_type, ""); - struct lp_build_if_state ifthen; - LLVMValueRef cond; + lp_build_masked_scatter(gallivm, store_bld->type.length, bit_size, + lp_vec_add_offset_ptr(bld_base, bit_size, + scratch_ptr_vec, chan_offset), + val, exec_mask); + } +} + +static void +emit_clock(struct lp_build_nir_context *bld_base, + LLVMValueRef dst[4]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = get_int_bld(bld_base, true, 32); - loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, - loop_state.counter, ""); + lp_init_clock_hook(gallivm); - cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); - cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); - lp_build_if(&ifthen, gallivm, cond); + LLVMTypeRef get_time_type = LLVMFunctionType(LLVMInt64TypeInContext(gallivm->context), NULL, 0, 1); + LLVMValueRef result = LLVMBuildCall2(builder, get_time_type, gallivm->get_time_hook, NULL, 0, ""); - LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), ""); - lp_build_pointer_set(builder, ptr2, loop_index, value_ptr); + LLVMValueRef hi = LLVMBuildShl(builder, result, lp_build_const_int64(gallivm, 32), ""); + hi = LLVMBuildTrunc(builder, hi, uint_bld->elem_type, ""); + LLVMValueRef lo = LLVMBuildTrunc(builder, result, uint_bld->elem_type, ""); + dst[0] = lp_build_broadcast_scalar(uint_bld, lo); + dst[1] = lp_build_broadcast_scalar(uint_bld, hi); +} - lp_build_endif(&ifthen); - lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), - NULL, LLVMIntUGE); +LLVMTypeRef +lp_build_cs_func_call_context(struct gallivm_state *gallivm, int length, + LLVMTypeRef context_type, LLVMTypeRef resources_type) +{ + LLVMTypeRef args[LP_NIR_CALL_CONTEXT_MAX_ARGS]; + + args[LP_NIR_CALL_CONTEXT_CONTEXT] = LLVMPointerType(context_type, 0); + args[LP_NIR_CALL_CONTEXT_RESOURCES] = LLVMPointerType(resources_type, 0); + args[LP_NIR_CALL_CONTEXT_SHARED] = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); /* shared_ptr */ + args[LP_NIR_CALL_CONTEXT_SCRATCH] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* scratch ptr */ + args[LP_NIR_CALL_CONTEXT_WORK_DIM] = LLVMInt32TypeInContext(gallivm->context); /* work_dim */ + args[LP_NIR_CALL_CONTEXT_THREAD_ID_0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[0] */ + args[LP_NIR_CALL_CONTEXT_THREAD_ID_1] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[1] */ + args[LP_NIR_CALL_CONTEXT_THREAD_ID_2] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[2] */ + args[LP_NIR_CALL_CONTEXT_BLOCK_ID_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[0] */ + args[LP_NIR_CALL_CONTEXT_BLOCK_ID_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[1] */ + args[LP_NIR_CALL_CONTEXT_BLOCK_ID_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[2] */ + + args[LP_NIR_CALL_CONTEXT_GRID_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[0] */ + args[LP_NIR_CALL_CONTEXT_GRID_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[1] */ + args[LP_NIR_CALL_CONTEXT_GRID_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[2] */ + args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[0] */ + args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[1] */ + args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[2] */ + + LLVMTypeRef stype = LLVMStructTypeInContext(gallivm->context, args, LP_NIR_CALL_CONTEXT_MAX_ARGS, 0); + return stype; +} + +static void +build_call_context(struct lp_build_nir_soa_context *bld) +{ + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + bld->call_context_ptr = lp_build_alloca(gallivm, bld->call_context_type, "callcontext"); + LLVMValueRef call_context = LLVMGetUndef(bld->call_context_type); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->context_ptr, LP_NIR_CALL_CONTEXT_CONTEXT, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->resources_ptr, LP_NIR_CALL_CONTEXT_RESOURCES, ""); + if (bld->shared_ptr) { + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->shared_ptr, LP_NIR_CALL_CONTEXT_SHARED, ""); + } else { + call_context = LLVMBuildInsertValue(gallivm->builder, call_context, + LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)), + LP_NIR_CALL_CONTEXT_SHARED, ""); + } + if (bld->scratch_ptr) { + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->scratch_ptr, LP_NIR_CALL_CONTEXT_SCRATCH, ""); + } else { + call_context = LLVMBuildInsertValue(gallivm->builder, call_context, + LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)), + LP_NIR_CALL_CONTEXT_SCRATCH, ""); } + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.work_dim, LP_NIR_CALL_CONTEXT_WORK_DIM, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.thread_id[0], LP_NIR_CALL_CONTEXT_THREAD_ID_0, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.thread_id[1], LP_NIR_CALL_CONTEXT_THREAD_ID_1, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.thread_id[2], LP_NIR_CALL_CONTEXT_THREAD_ID_2, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.block_id[0], LP_NIR_CALL_CONTEXT_BLOCK_ID_0, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.block_id[1], LP_NIR_CALL_CONTEXT_BLOCK_ID_1, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.block_id[2], LP_NIR_CALL_CONTEXT_BLOCK_ID_2, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.grid_size[0], LP_NIR_CALL_CONTEXT_GRID_SIZE_0, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.grid_size[1], LP_NIR_CALL_CONTEXT_GRID_SIZE_1, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.grid_size[2], LP_NIR_CALL_CONTEXT_GRID_SIZE_2, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.block_size[0], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.block_size[1], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, ""); + call_context = LLVMBuildInsertValue(gallivm->builder, + call_context, bld->system_values.block_size[2], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, ""); + LLVMBuildStore(gallivm->builder, call_context, bld->call_context_ptr); } -void lp_build_nir_soa(struct gallivm_state *gallivm, - struct nir_shader *shader, - const struct lp_build_tgsi_params *params, - LLVMValueRef (*outputs)[4]) +void lp_build_nir_soa_func(struct gallivm_state *gallivm, + struct nir_shader *shader, + nir_function_impl *impl, + const struct lp_build_tgsi_params *params, + LLVMValueRef (*outputs)[4]) { struct lp_build_nir_soa_context bld; - struct lp_type type = params->type; + const struct lp_type type = params->type; struct lp_type res_type; assert(type.length <= LP_MAX_VECTOR_LENGTH); @@ -2502,37 +3004,51 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.bld_base.elect = emit_elect; bld.bld_base.reduce = emit_reduce; bld.bld_base.ballot = emit_ballot; +#if LLVM_VERSION_MAJOR >= 10 + bld.bld_base.shuffle = emit_shuffle; +#endif bld.bld_base.read_invocation = emit_read_invocation; bld.bld_base.helper_invocation = emit_helper_invocation; bld.bld_base.interp_at = emit_interp_at; + bld.bld_base.call = emit_call; bld.bld_base.load_scratch = emit_load_scratch; bld.bld_base.store_scratch = emit_store_scratch; + bld.bld_base.load_const = emit_load_const; + bld.bld_base.clock = emit_clock; + bld.bld_base.set_vertex_and_primitive_count = emit_set_vertex_and_primitive_count; + bld.bld_base.launch_mesh_workgroups = emit_launch_mesh_workgroups; + bld.bld_base.fns = params->fns; + bld.bld_base.func = params->current_func; bld.mask = params->mask; bld.inputs = params->inputs; bld.outputs = outputs; bld.consts_ptr = params->consts_ptr; - bld.const_sizes_ptr = params->const_sizes_ptr; bld.ssbo_ptr = params->ssbo_ptr; - bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr; bld.sampler = params->sampler; -// bld.bld_base.info = params->info; + bld.context_type = params->context_type; bld.context_ptr = params->context_ptr; + bld.resources_type = params->resources_type; + bld.resources_ptr = params->resources_ptr; + bld.thread_data_type = params->thread_data_type; bld.thread_data_ptr = params->thread_data_ptr; bld.bld_base.aniso_filter_table = params->aniso_filter_table; bld.image = params->image; bld.shared_ptr = params->shared_ptr; + bld.payload_ptr = params->payload_ptr; bld.coro = params->coro; bld.kernel_args_ptr = params->kernel_args; + bld.num_inputs = params->num_inputs; bld.indirects = 0; - if (params->info->indirect_files & (1 << TGSI_FILE_INPUT)) + if (shader->info.inputs_read_indirectly) bld.indirects |= nir_var_shader_in; bld.gs_iface = params->gs_iface; bld.tcs_iface = params->tcs_iface; bld.tes_iface = params->tes_iface; bld.fs_iface = params->fs_iface; + bld.mesh_iface = params->mesh_iface; if (bld.gs_iface) { struct lp_build_context *uint_bld = &bld.bld_base.uint_bld; @@ -2550,19 +3066,31 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, } lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); - bld.system_values = *params->system_values; + if (params->system_values) + bld.system_values = *params->system_values; bld.bld_base.shader = shader; - if (shader->scratch_size) { + bld.scratch_size = ALIGN(shader->scratch_size, 8); + if (params->scratch_ptr) + bld.scratch_ptr = params->scratch_ptr; + else if (shader->scratch_size) { bld.scratch_ptr = lp_build_array_alloca(gallivm, LLVMInt8TypeInContext(gallivm->context), - lp_build_const_int32(gallivm, shader->scratch_size * type.length), + lp_build_const_int32(gallivm, bld.scratch_size * type.length), "scratch"); } - bld.scratch_size = shader->scratch_size; + + if (!exec_list_is_singular(&shader->functions)) { + bld.call_context_type = lp_build_cs_func_call_context(gallivm, type.length, bld.context_type, bld.resources_type); + if (!params->call_context_ptr) { + build_call_context(&bld); + } else + bld.call_context_ptr = params->call_context_ptr; + } + emit_prologue(&bld); - lp_build_nir_llvm(&bld.bld_base, shader); + lp_build_nir_llvm(&bld.bld_base, shader, impl); if (bld.gs_iface) { LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder; @@ -2573,10 +3101,10 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i); total_emitted_vertices_vec = - LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], ""); + LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], ""); emitted_prims_vec = - LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], ""); + LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], ""); bld.gs_iface->gs_epilogue(bld.gs_iface, total_emitted_vertices_vec, emitted_prims_vec, i); @@ -2584,3 +3112,14 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, } lp_exec_mask_fini(&bld.exec_mask); } + +void lp_build_nir_soa(struct gallivm_state *gallivm, + struct nir_shader *shader, + const struct lp_build_tgsi_params *params, + LLVMValueRef (*outputs)[4]) +{ + lp_build_nir_prepasses(shader); + lp_build_nir_soa_func(gallivm, shader, + nir_shader_get_entrypoint(shader), + params, outputs); +} |