summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2013-10-13 17:56:10 +0000
committerVincent Lejeune <vljn@ovi.com>2013-10-13 17:56:10 +0000
commitf2b3a569ae25dbba264cef93602b4147d2a723d6 (patch)
tree857095c64034e715e31ce2dcd5e14b7b1af4c5e8
parent91ec4b0cac7a7476a9d30d6f1adbf218ee6673a0 (diff)
R600: Use masked read sel for texture instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192554 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp5
-rw-r--r--test/CodeGen/R600/swizzle-export.ll15
2 files changed, 12 insertions, 8 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 81a28be104a..3c2e3888e08 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1379,6 +1379,11 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
};
for (unsigned i = 0; i < 4; i++) {
+ if (NewBldVec[i].getOpcode() == ISD::UNDEF)
+ // We mask write here to teach later passes that the ith element of this
+ // vector is undef. Thus we can use it to reduce 128 bits reg usage,
+ // break false dependencies and additionnaly make assembly easier to read.
+ RemapSwizzle[i] = 7; // SEL_MASK_WRITE
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
if (C->isZero()) {
RemapSwizzle[i] = 4; // SEL_0
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll
index 02fe13a720e..9a58f667f0d 100644
--- a/test/CodeGen/R600/swizzle-export.ll
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -93,6 +93,7 @@ main_body:
}
; EG-CHECK: @main2
+; EG-CHECK: T{{[0-9]+}}.XY__
; EG-CHECK: T{{[0-9]+}}.YXZ0
define void @main2() #0 {
@@ -110,14 +111,12 @@ main_body:
%10 = extractelement <4 x float> %9, i32 1
%11 = insertelement <4 x float> undef, float %0, i32 0
%12 = insertelement <4 x float> %11, float %1, i32 1
- %13 = insertelement <4 x float> %12, float %2, i32 2
- %14 = insertelement <4 x float> %13, float %3, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %14, i32 60, i32 1)
- %15 = insertelement <4 x float> undef, float %6, i32 0
- %16 = insertelement <4 x float> %15, float %8, i32 1
- %17 = insertelement <4 x float> %16, float %10, i32 2
- %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
+ call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
+ %13 = insertelement <4 x float> undef, float %6, i32 0
+ %14 = insertelement <4 x float> %13, float %8, i32 1
+ %15 = insertelement <4 x float> %14, float %10, i32 2
+ %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2)
ret void
}