summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2014-10-20 21:28:41 +0000
committerTim Northover <tnorthover@apple.com>2014-10-20 21:28:41 +0000
commit4e399f4500051ebc4dce6f9a54a3786967e99eca (patch)
tree50a8a0c4006118fa93c08f8937e9feeb63971480
parent2d6aee13e57125f6688ecc79dce9ee48d806ba19 (diff)
ARM: rework Thumb1 frame index rewriting
The previous code had a few problems, motivating the choices here. 1. It could create instructions clobbering CPSR, but the incoming MachineInstr didn't reflect this. A potential source of corruption. This is why the patch has a new PseudoInst for before lowering. 2. Similarly, there was some code to handle the incoming instruction not being ARMCC::AL, but this would have caused massive problems if it was actually invoked when a complex offset needing more than one instruction was requested. 3. It wasn't designed to handle unaligned pointers (or offsets). These should probably be minimised anyway, but the code needs to deal with them properly regardless. 4. It had some rather dubious ad-hoc code to avoid calling emitThumbRegPlusImmediate, a function which should be designed to do precisely this job. We seem to cover the common cases correctly now, and hopefully can enhance emitThumbRegPlusImmediate to handle any extra optimisations we need to add in future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220236 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp5
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp6
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp113
-rw-r--r--test/CodeGen/ARM/thumb1-varalloc.ll76
6 files changed, 102 insertions, 114 deletions
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b01f2a75bc1..6dc0493c797 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -585,7 +585,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
int64_t Offset) const {
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
- (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@@ -598,11 +598,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
- MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
- .addFrameIndex(FrameIdx).addImm(Offset));
+ MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
if (!AFI->isThumb1OnlyFunction())
- AddDefaultCC(MIB);
+ AddDefaultCC(AddDefaultPred(MIB));
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 19bc2cab668..2d546dfd198 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2497,9 +2497,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
if (Subtarget->isThumb1Only()) {
- SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
- getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops);
+ return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index d63557a34ab..7df13834055 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -6571,9 +6571,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3, RegState::Kill));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
- .addFrameIndex(FI)
- .addImm(36)); // &jbuf[1] :: pc
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36); // &jbuf[1] :: pc
AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
.addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg5, RegState::Kill)
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 2c873ac9954..a86784432ad 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -360,6 +360,14 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
let DecoderMethod = "DecodeThumbAddSpecialReg";
}
+// Thumb1 frame lowering is rather fragile, we hope to be able to use
+// tADDrSPi, but we may need to insert a sequence that clobbers CPSR.
+def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset),
+ NoItinerary, []>,
+ Requires<[IsThumb, IsThumb1Only]> {
+ let Defs = [CPSR];
+}
+
// ADD sp, sp, #<imm7>
def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
IIC_iALUi, "add", "\t$Rdn, $imm", []>,
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 291e97b0708..690db9cf2ae 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -196,6 +196,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
Bytes &= ~3;
ExtraOpc = ARM::tADDi3;
}
+ DstNotEqBase = true;
NumBits = 8;
Scale = 4;
Opc = ARM::tADDrSPi;
@@ -241,6 +242,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
.setMIFlags(MIFlags));
AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
+ } else if (isARMLowRegister(DestReg) && BaseReg == ARM::SP && Bytes > 0) {
+ unsigned ThisVal = std::min(1020U, Bytes / 4 * 4);
+ Bytes -= ThisVal;
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(ThisVal / 4))
+ .setMIFlags(MIFlags);
} else {
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
.addReg(BaseReg, RegState::Kill))
@@ -294,32 +301,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
}
}
-/// emitThumbConstant - Emit a series of instructions to materialize a
-/// constant.
-static void emitThumbConstant(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned DestReg, int Imm,
- const TargetInstrInfo &TII,
- const Thumb1RegisterInfo& MRI,
- DebugLoc dl) {
- bool isSub = Imm < 0;
- if (isSub) Imm = -Imm;
-
- int Chunk = (1 << 8) - 1;
- int ThisVal = (Imm > Chunk) ? Chunk : Imm;
- Imm -= ThisVal;
- AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
- DestReg))
- .addImm(ThisVal));
- if (Imm > 0)
- emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
- if (isSub) {
- const MCInstrDesc &MCID = TII.get(ARM::tRSB);
- AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
- .addReg(DestReg, RegState::Kill));
- }
-}
-
static void removeOperands(MachineInstr &MI, unsigned i) {
unsigned Op = i;
for (unsigned e = MI.getNumOperands(); i != e; ++i)
@@ -352,85 +333,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
- if (Opcode == ARM::tADDrSPi) {
+ if (Opcode == ARM::tADDframe) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
-
- // Can't use tADDrSPi if it's based off the frame pointer.
- unsigned NumBits = 0;
- unsigned Scale = 1;
- if (FrameReg != ARM::SP) {
- Opcode = ARM::tADDi3;
- NumBits = 3;
- } else {
- NumBits = 8;
- Scale = 4;
- }
-
- unsigned PredReg;
- if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
- // Turn it into a move.
- MI.setDesc(TII.get(ARM::tMOVr));
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- // Remove offset
- MI.RemoveOperand(FrameRegIdx+1);
- return true;
- }
-
- // Common case: small offset, fits into instruction.
- unsigned Mask = (1 << NumBits) - 1;
- if (Offset % Scale == 0 && ((Offset / Scale) & ~Mask) == 0) {
- // Replace the FrameIndex with sp / fp
- if (Opcode == ARM::tADDi3) {
- MI.setDesc(TII.get(Opcode));
- removeOperands(MI, FrameRegIdx);
- AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
- .addImm(Offset / Scale));
- } else {
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
- }
- return true;
- }
-
unsigned DestReg = MI.getOperand(0).getReg();
- unsigned Bytes = (Offset > 0) ? Offset : -Offset;
- unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
- // MI would expand into a large number of instructions. Don't try to
- // simplify the immediate.
- if (NumMIs > 2) {
- emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
- *this);
- MBB.erase(II);
- return true;
- }
- if (Offset > 0) {
- // Translate r0 = add sp, imm to
- // r0 = add sp, 255*4
- // r0 = add r0, (imm - 255*4)
- if (Opcode == ARM::tADDi3) {
- MI.setDesc(TII.get(Opcode));
- removeOperands(MI, FrameRegIdx);
- AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
- } else {
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
- }
- Offset = (Offset - Mask * Scale);
- MachineBasicBlock::iterator NII = std::next(II);
- MI.getOperand(0).setIsDead(false);
- emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
- *this);
- } else {
- // Translate r0 = add sp, -imm to
- // r0 = -imm (this is then translated into a series of instructions)
- // r0 = add r0, sp
- emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
-
- MI.setDesc(TII.get(ARM::tADDhirr));
- MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
- MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
- }
+ emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+ *this);
+ MBB.erase(II);
return true;
} else {
if (AddrMode != ARMII::AddrModeT1_s)
diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll
index e07e8aab77a..19c5dbeceab 100644
--- a/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -1,13 +1,13 @@
; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
-; rdar://8819685
@__bar = external hidden global i8*
@__baz = external hidden global i8*
+; rdar://8819685
define i8* @_foo() {
entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
%size = alloca i32, align 4
%0 = load i8** @__bar, align 4
@@ -40,3 +40,75 @@ bb3:
declare noalias i8* @strdup(i8* nocapture) nounwind
declare i32 @_called_func(i8*, i32*) nounwind
+
+; Variable ending up at unaligned offset from sp (i.e. not a multiple of 4)
+define void @test_local_var_addr() {
+; CHECK-LABEL: test_local_var_addr:
+
+ %addr1 = alloca i8
+ %addr2 = alloca i8
+
+; CHECK: mov r0, sp
+; CHECK: adds r0, r0, #{{[0-9]+}}
+; CHECK: blx _take_ptr
+ call void @take_ptr(i8* %addr1)
+
+; CHECK: mov r0, sp
+; CHECK: adds r0, r0, #{{[0-9]+}}
+; CHECK: blx _take_ptr
+ call void @take_ptr(i8* %addr2)
+
+ ret void
+}
+
+; Simple variable ending up *at* sp.
+define void @test_simple_var() {
+; CHECK-LABEL: test_simple_var:
+
+ %addr32 = alloca i32
+ %addr8 = bitcast i32* %addr32 to i8*
+
+; CHECK: mov r0, sp
+; CHECK-NOT: adds r0
+; CHECK: blx _take_ptr
+ call void @take_ptr(i8* %addr8)
+ ret void
+}
+
+; Simple variable ending up at aligned offset from sp.
+define void @test_local_var_addr_aligned() {
+; CHECK-LABEL: test_local_var_addr_aligned:
+
+ %addr1.32 = alloca i32
+ %addr1 = bitcast i32* %addr1.32 to i8*
+ %addr2.32 = alloca i32
+ %addr2 = bitcast i32* %addr2.32 to i8*
+
+; CHECK: add r0, sp, #{{[0-9]+}}
+; CHECK: blx _take_ptr
+ call void @take_ptr(i8* %addr1)
+
+; CHECK: mov r0, sp
+; CHECK-NOT: add r0
+; CHECK: blx _take_ptr
+ call void @take_ptr(i8* %addr2)
+
+ ret void
+}
+
+; Simple variable ending up at aligned offset from sp.
+define void @test_local_var_big_offset() {
+; CHECK-LABEL: test_local_var_big_offset:
+ %addr1.32 = alloca i32, i32 257
+ %addr1 = bitcast i32* %addr1.32 to i8*
+ %addr2.32 = alloca i32, i32 257
+
+; CHECK: add [[RTMP:r[0-9]+]], sp, #1020
+; CHECL: add r0, [[RTMP]], #8
+; CHECK: blx _take_ptr
+ call void @take_ptr(i8* %addr1)
+
+ ret void
+}
+
+declare void @take_ptr(i8*)