diff options
author | Tim Northover <tnorthover@apple.com> | 2014-05-27 10:43:38 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-05-27 10:43:38 +0000 |
commit | e43c5023fe0c1de8d5c1ff13d07ff710b196d121 (patch) | |
tree | 9ed856584ebadab0711b006fd43537f23e3da0cd | |
parent | ae85c73d4af578822bb957d94d6385d73ff57b3b (diff) |
ARM: teach AAPCS-VFP to deal with Cortex-M4.
Cortex-M4 only has single-precision floating point support, so any LLVM
"double" type will have been split into 2 i32s by now. Fortunately, the
consecutive-register framework turns out to be precisely what's needed to
reconstruct the double and follow AAPCS-VFP correctly!
rdar://problem/17012966
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209650 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 19 | ||||
-rw-r--r-- | lib/Target/ARM/ARMCallingConv.h | 17 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 17 | ||||
-rw-r--r-- | test/CodeGen/ARM/aapcs-hfa-code.ll | 111 |
4 files changed, 143 insertions, 21 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c181046ba23..070e929fce7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7176,11 +7176,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); - if (NeedsRegBlock) { + if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - if (Value == NumValues - 1) - Flags.setInConsecutiveRegsLast(); - } Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7226,6 +7223,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (j != 0) MyFlags.Flags.setOrigAlign(1); + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7412,11 +7413,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); - if (NeedsRegBlock) { + if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - if (Value == NumValues - 1) - Flags.setInConsecutiveRegsLast(); - } Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7429,6 +7427,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); + + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 8e0fd893528..dc41c1c14bb 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -177,9 +177,8 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); - // AAPCS HFAs must have 1-4 elements, all of the same type - assert(PendingHAMembers.size() < 4); + assert(PendingHAMembers.size() < 8); if (PendingHAMembers.size() > 0) assert(PendingHAMembers[0].getLocVT() == LocVT); @@ -189,7 +188,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); if (ArgFlags.isInConsecutiveRegsLast()) { - assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 && "Homogeneous aggregates must have between 1 and 4 members"); // Try to allocate a contiguous block of registers, each of the correct @@ -197,6 +196,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, const uint16_t *RegList; unsigned NumRegs; switch (LocVT.SimpleTy) { + case MVT::i32: case MVT::f32: RegList = SRegList; NumRegs = 16; @@ -235,11 +235,20 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State.AllocateReg(SRegList[regNo]); unsigned Size = LocVT.getSizeInBits() / 8; - unsigned Align = LocVT.SimpleTy == MVT::v2f64 ? 8 : Size; + unsigned Align = Size; + + if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) { + // Vectors are always aligned to 8 bytes. If we've seen an i32 here + // it's because it's been split from a larger type, also with align 8. + Align = 8; + } for (auto It : PendingHAMembers) { It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); + + // Only the first member needs to be aligned. + Align = 1; } // All pending members have now been allocated diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5beb752d3a4..00d07e84067 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -10778,14 +10778,13 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { - if (getEffectiveCallingConv(CallConv, isVarArg) == - CallingConv::ARM_AAPCS_VFP) { - HABaseType Base = HA_UNKNOWN; - uint64_t Members = 0; - bool result = isHomogeneousAggregate(Ty, Base, Members); - DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); - return result; - } else { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) return false; - } + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; } diff --git a/test/CodeGen/ARM/aapcs-hfa-code.ll b/test/CodeGen/ARM/aapcs-hfa-code.ll new file mode 100644 index 00000000000..396e83816cc --- /dev/null +++ b/test/CodeGen/ARM/aapcs-hfa-code.ll @@ -0,0 +1,111 @@ +; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -o - | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7em-none-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK-M4F + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define arm_aapcs_vfpcc void @test_1float({ float } %a) { + call arm_aapcs_vfpcc void @test_1float({ float } { float 1.0 }) + ret void + +; CHECK-LABEL: test_1float: +; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK: bl test_1float + +; CHECK-M4F-LABEL: test_1float: +; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-M4F: bl test_1float +} + +define arm_aapcs_vfpcc void @test_2float({ float, float } %a) { + call arm_aapcs_vfpcc void @test_2float({ float, float } { float 1.0, float 2.0 }) + ret void + +; CHECK-LABEL: test_2float: +; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-DAG: vmov.f32 s1, #2.{{0+}}e+00 +; CHECK: bl test_2float + +; CHECK-M4F-LABEL: test_2float: +; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-M4F-DAG: vmov.f32 s1, #2.{{0+}}e+00 +; CHECK-M4F: bl test_2float +} + +define arm_aapcs_vfpcc void @test_3float({ float, float, float } %a) { + call arm_aapcs_vfpcc void @test_3float({ float, float, float } { float 1.0, float 2.0, float 3.0 }) + ret void + +; CHECK-LABEL: test_3float: +; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-DAG: vmov.f32 s1, #2.{{0+}}e+00 +; CHECK-DAG: vmov.f32 s2, #3.{{0+}}e+00 +; CHECK: bl test_3float + +; CHECK-M4F-LABEL: test_3float: +; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-M4F-DAG: vmov.f32 s1, #2.{{0+}}e+00 +; CHECK-M4F-DAG: vmov.f32 s2, #3.{{0+}}e+00 +; CHECK-M4F: bl test_3float +} + +define arm_aapcs_vfpcc void @test_1double({ double } %a) { +; CHECK-LABEL: test_1double: +; CHECK-DAG: vmov.f64 d0, #1.{{0+}}e+00 +; CHECK: bl test_1double + +; CHECK-M4F-LABEL: test_1double: +; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 +; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F-DAG: vmov s0, [[ONELO]] +; CHECK-M4F-DAG: vmov s1, [[ONEHI]] +; CHECK-M4F: bl test_1double + + call arm_aapcs_vfpcc void @test_1double({ double } { double 1.0 }) + ret void +} + +; Final double argument might be put in s15 & [sp] if we're careless. It should +; go all on the stack. +define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3 x float], double %a) { +; CHECK-LABEL: test_1double_nosplit: +; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0 +; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0 +; CHECK-DAG: movt [[ONEHI]], #16368 +; CHECK: strd [[ONELO]], [[ONEHI]], [sp] +; CHECK: bl test_1double_nosplit + +; CHECK-M4F-LABEL: test_1double_nosplit: +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 +; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 +; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F-DAG: str [[ONELO]], [sp] +; CHECK-M4F-DAG: str [[ONEHI]], [sp, #4] +; CHECK-M4F: bl test_1double_nosplit + call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0) + ret void +} + +; Final double argument might go at [sp, #4] if we're careless. Should go at +; [sp, #8] to preserve alignment. +define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double], float, double) { + call arm_aapcs_vfpcc void @test_1double_misaligned([4 x double] undef, [4 x double] undef, float undef, double 1.0) + +; CHECK-LABEL: test_1double_misaligned: +; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0 +; CHECK-DAG: mov r[[BASE:[0-9]+]], sp +; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0 +; CHECK-DAG: movt [[ONEHI]], #16368 +; CHECK-DAG: str [[ONELO]], [r[[BASE]], #8]! +; CHECK-DAG: str [[ONEHI]], [r[[BASE]], #4] + +; CHECK-M4F-LABEL: test_1double_misaligned: +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 +; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 +; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F-DAG: str [[ONELO]], [sp, #8] +; CHECK-M4F-DAG: str [[ONEHI]], [sp, #12] +; CHECK-M4F: bl test_1double_misaligned + + ret void +} |