summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>2020-01-27 13:42:11 +0100
committerDylan Baker <dylan@pnwbakers.com>2020-03-13 09:23:34 -0700
commitb35d45f21abcb9f4c0c4935015ccff7f4eb02a9c (patch)
treea8757c2b4dc7a1fdc17a4621eca360513e2744db
parent1b2c982166beaa5c165ced0c23318835711df508 (diff)
ac/llvm: add missing optimization barrier for 64-bit readlanes
Otherwise, LLVM optimizes it but it's actually incorrect. Fixes: 0f45d4dc2b1 ("ac: add ac_build_readlane without optimization barrier") Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3585> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3585> (cherry picked from commit cc320ef9af6b84b6a1f275261b071d05c0ee6a62)
-rw-r--r--.pick_status.json2
-rw-r--r--src/amd/llvm/ac_llvm_build.c67
2 files changed, 41 insertions, 28 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 2072a4b5d9a..ba880dae2be 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -607,7 +607,7 @@
"description": "ac/llvm: add missing optimization barrier for 64-bit readlanes",
"nominated": true,
"nomination_type": 1,
- "resolution": 0,
+ "resolution": 1,
"master_sha": null,
"because_sha": "0f45d4dc2b15e137346e1e3f064a24302e1c9048"
},
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index a131c2a10cd..a1a9a453e4e 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -3611,11 +3611,15 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
}
static LLVMValueRef
-_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef lane, bool with_opt_barrier)
{
LLVMTypeRef type = LLVMTypeOf(src);
LLVMValueRef result;
+ if (with_opt_barrier)
+ ac_build_optimization_barrier(ctx, &src);
+
src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
if (lane)
lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
@@ -3630,20 +3634,13 @@ _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef l
return LLVMBuildTrunc(ctx->builder, result, type, "");
}
-/**
- * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
- *
- * The optimization barrier is not needed if the value is the same in all lanes
- * or if this is called in the outermost block.
- *
- * @param ctx
- * @param src
- * @param lane - id of the lane or NULL for the first active lane
- * @return value of the lane
- */
-LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
- LLVMValueRef src, LLVMValueRef lane)
+static LLVMValueRef
+ac_build_readlane_common(struct ac_llvm_context *ctx,
+ LLVMValueRef src, LLVMValueRef lane,
+ bool with_opt_barrier)
{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
LLVMValueRef ret;
@@ -3654,32 +3651,48 @@ LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
LLVMBuildBitCast(ctx->builder, src, vec_type, "");
ret = LLVMGetUndef(vec_type);
for (unsigned i = 0; i < bits / 32; i++) {
+ LLVMValueRef ret_comp;
+
src = LLVMBuildExtractElement(ctx->builder, src_vector,
LLVMConstInt(ctx->i32, i, 0), "");
- LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
+
+ ret_comp = _ac_build_readlane(ctx, src, lane,
+ with_opt_barrier);
+
ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
LLVMConstInt(ctx->i32, i, 0), "");
}
} else {
- ret = _ac_build_readlane(ctx, src, lane);
+ ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
}
- return ret;
+ if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
+ return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+/**
+ * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
+ *
+ * The optimization barrier is not needed if the value is the same in all lanes
+ * or if this is called in the outermost block.
+ *
+ * @param ctx
+ * @param src
+ * @param lane - id of the lane or NULL for the first active lane
+ * @return value of the lane
+ */
+LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef src, LLVMValueRef lane)
{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- LLVMValueRef ret;
+ return ac_build_readlane_common(ctx, src, lane, false);
+}
- ac_build_optimization_barrier(ctx, &src);
- ret = ac_build_readlane_no_opt_barrier(ctx, src, lane);
- if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
- return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+{
+ return ac_build_readlane_common(ctx, src, lane, true);
}
LLVMValueRef