summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2018-12-17 15:37:05 +0000
committerEmil Velikov <emil.l.velikov@gmail.com>2019-01-11 20:35:36 +0000
commitf5e752c4152b9924d780e60f73c7afd36c77f5f5 (patch)
tree986f8df0837ee4402727ab485af9d0b328b9b085
parent2a9073301fc9c5ee2ce735f5345bb454bd5bebb7 (diff)
ac: split 16-bit ssbo loads that may not be dword aligned
This ends up refactoring visit_load_buffer() a little. Fixes: 7e7ee826982 ('ac: add support for 16bit buffer loads') Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108114 Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit bde9f482de69528db5ccf5dd6bbfd8359adfbb19) Note: this is a backport of the above commit provided by Rhys.
-rw-r--r--src/amd/common/ac_llvm_build.c8
-rw-r--r--src/amd/common/ac_nir_to_llvm.c80
2 files changed, 44 insertions, 44 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1392ec0f238..8953da7f18d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2882,9 +2882,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
if (count == num_components)
return value;
- LLVMValueRef masks[] = {
- ctx->i32_0, ctx->i32_1,
- LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
+ LLVMValueRef masks[MAX2(count, 2)];
+ masks[0] = ctx->i32_0;
+ masks[1] = ctx->i32_1;
+ for (unsigned i = 2; i < count; i++)
+ masks[i] = LLVMConstInt(ctx->i32, i, false);
if (count == 1)
return LLVMBuildExtractElement(ctx->builder, value, masks[0],
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 836f518e047..827cb5d85a8 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1622,37 +1622,45 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
- LLVMValueRef results[2];
- int load_bytes;
int elem_size_bytes = instr->dest.ssa.bit_size / 8;
int num_components = instr->num_components;
- int num_bytes = num_components * elem_size_bytes;
enum gl_access_qualifier access = nir_intrinsic_access(instr);
LLVMValueRef glc = ctx->ac.i1false;
if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
glc = ctx->ac.i1true;
- for (int i = 0; i < num_bytes; i += load_bytes) {
- load_bytes = MIN2(num_bytes - i, 16);
- const char *load_name;
- LLVMTypeRef data_type;
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false);
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[0]), false);
- LLVMValueRef vindex = ctx->ac.i32_0;
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
+ get_src(ctx, instr->src[0]), false);
+ LLVMValueRef vindex = ctx->ac.i32_0;
+
+ LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
- int idx = i ? 1 : 0;
+ LLVMValueRef results[4];
+ for (int i = 0; i < num_components;) {
+ int num_elems = num_components - i;
+ if (elem_size_bytes < 4)
+ num_elems = 1;
+ if (num_elems * elem_size_bytes > 16)
+ num_elems = 16 / elem_size_bytes;
+ int load_bytes = num_elems * elem_size_bytes;
+
+ LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
+
+ LLVMValueRef ret;
if (load_bytes == 2) {
- results[idx] = ac_build_tbuffer_load_short(&ctx->ac,
- rsrc,
- vindex,
- offset,
- ctx->ac.i32_0,
- immoffset,
- glc);
+ ret = ac_build_tbuffer_load_short(&ctx->ac,
+ rsrc,
+ vindex,
+ offset,
+ ctx->ac.i32_0,
+ immoffset,
+ glc);
} else {
+ const char *load_name;
+ LLVMTypeRef data_type;
switch (load_bytes) {
case 16:
case 12:
@@ -1678,33 +1686,23 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
glc,
ctx->ac.i1false,
};
- results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
- unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes;
- LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems);
- results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, "");
+ ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
}
- }
- assume(results[0]);
- LLVMValueRef ret = results[0];
- if (num_bytes > 16 || num_components == 3) {
- LLVMValueRef masks[] = {
- LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
- LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
- };
+ LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
+ ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
- if (num_bytes > 16 && num_components == 3) {
- /* we end up with a v2i64 and i64 but shuffle fails on that */
- results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2);
- }
+ LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
- LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
- ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
- results[num_bytes > 16 ? 1 : 0], swizzle, "");
+ for (unsigned j = 0; j < num_elems; j++) {
+ results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
+ }
+ i += num_elems;
}
- return LLVMBuildBitCast(ctx->ac.builder, ret,
- get_def_type(ctx, &instr->dest.ssa), "");
+ return ac_build_gather_values(&ctx->ac, results, num_components);
}
static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,