summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Weigand <ulrich.weigand@de.ibm.com>2015-02-18 09:13:27 +0000
committerUlrich Weigand <ulrich.weigand@de.ibm.com>2015-02-18 09:13:27 +0000
commitbebd59c74ba55cdda62466f72c5031274c37b93b (patch)
treec34c3d4c8319dd1a1c1f92fc26d519784f08e7a3
parent5d27960454e35781341229dc3e539f6a6c37fe23 (diff)
[SystemZ] Support all TLS access models - CodeGen part
The current SystemZ back-end only supports the local-exec TLS access model. This patch adds all required CodeGen support for the other TLS models, which means in particular: - Expand initial-exec TLS accesses by loading TLS offsets from the GOT using @indntpoff relocations. - Expand general-dynamic and local-dynamic accesses by generating the appropriate calls to __tls_get_offset. Note that this routine has a non-standard ABI and requires loading the GOT pointer into %r12, so the patch also adds support for the GLOBAL_OFFSET_TABLE ISD node. - Add a new platform-specific optimization pass to remove redundant __tls_get_offset calls in the local-dynamic model (modeled after the corresponding X86 pass). - Add test cases verifying all access models and optimizations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229654 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt1
-rw-r--r--lib/Target/SystemZ/SystemZ.h1
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp37
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.h8
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp132
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h8
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h7
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td15
-rw-r--r--lib/Target/SystemZ/SystemZLDCleanup.cpp143
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h8
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td7
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp4
-rw-r--r--test/CodeGen/SystemZ/tls-01.ll6
-rw-r--r--test/CodeGen/SystemZ/tls-02.ll18
-rw-r--r--test/CodeGen/SystemZ/tls-03.ll23
-rw-r--r--test/CodeGen/SystemZ/tls-04.ll28
-rw-r--r--test/CodeGen/SystemZ/tls-05.ll15
-rw-r--r--test/CodeGen/SystemZ/tls-06.ll17
-rw-r--r--test/CodeGen/SystemZ/tls-07.ll16
21 files changed, 482 insertions, 19 deletions
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 41a614d9d15..60a39129640 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen
SystemZISelDAGToDAG.cpp
SystemZISelLowering.cpp
SystemZInstrInfo.cpp
+ SystemZLDCleanup.cpp
SystemZLongBranch.cpp
SystemZMachineFunctionInfo.cpp
SystemZMCInstLower.cpp
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index c8b95b2b2ca..5f17edb51a9 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -111,6 +111,7 @@ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
} // end namespace llvm
#endif
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 8b18bc16e1c..3eb13abfa4e 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -66,6 +66,20 @@ static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
.addImm(MI->getOperand(5).getImm());
}
+static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) {
+ StringRef Name = "__tls_get_offset";
+ return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name),
+ MCSymbolRefExpr::VK_PLT,
+ Context);
+}
+
+static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
+ StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+ return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name),
+ MCSymbolRefExpr::VK_None,
+ Context);
+}
+
void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
@@ -95,6 +109,26 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
break;
+ case SystemZ::TLS_GDCALL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(getTLSGetOffset(MF->getContext()))
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD));
+ break;
+
+ case SystemZ::TLS_LDCALL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(getTLSGetOffset(MF->getContext()))
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM));
+ break;
+
+ case SystemZ::GOT:
+ LoweredMI = MCInstBuilder(SystemZ::LARL)
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(getGlobalOffsetTable(MF->getContext()));
+ break;
+
case SystemZ::IILF64:
LoweredMI = MCInstBuilder(SystemZ::IILF)
.addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
@@ -172,6 +206,9 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
static MCSymbolRefExpr::VariantKind
getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
switch (Modifier) {
+ case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
+ case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM;
+ case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF;
case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF;
}
llvm_unreachable("Invalid SystemCPModifier!");
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index 19cec219e2d..44ea1d25f08 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -28,6 +28,11 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,
unsigned SystemZConstantPoolValue::getRelocationInfo() const {
switch (Modifier) {
+ case SystemZCP::TLSGD:
+ case SystemZCP::TLSLDM:
+ case SystemZCP::DTPOFF:
+ // May require a dynamic relocation.
+ return 2;
case SystemZCP::NTPOFF:
// May require a relocation, but the relocations are always resolved
// by the static linker.
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h
index 0bd8c205ea4..e5f1bb18581 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -19,13 +19,17 @@ class GlobalValue;
namespace SystemZCP {
enum SystemZCPModifier {
+ TLSGD,
+ TLSLDM,
+ DTPOFF,
NTPOFF
};
} // end namespace SystemZCP
/// A SystemZ-specific constant pool value. At present, the only
-/// defined constant pool values are offsets of thread-local variables
-/// (written x@NTPOFF).
+/// defined constant pool values are module IDs or offsets of
+/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF,
+/// or x@NTPOFF).
class SystemZConstantPoolValue : public MachineConstantPoolValue {
const GlobalValue *GV;
SystemZCP::SystemZCPModifier Modifier;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 4bcf78656a3..d92ad6a0054 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1830,6 +1830,52 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
return Result;
}
+SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG,
+ unsigned Opcode,
+ SDValue GOTOffset) const {
+ SDLoc DL(Node);
+ EVT PtrVT = getPointerTy();
+ SDValue Chain = DAG.getEntryNode();
+ SDValue Glue;
+
+ // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
+ Glue = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
+ Glue = Chain.getValue(1);
+
+ // The first call operand is the chain and the second is the TLS symbol.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
+ Node->getValueType(0),
+ 0, 0));
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
+ Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallingConv::C);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // Glue the call to the argument copies.
+ Ops.push_back(Glue);
+
+ // Emit the call.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
+ Glue = Chain.getValue(1);
+
+ // Copy the return value from %r2.
+ return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
+}
+
SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const {
SDLoc DL(Node);
@@ -1837,9 +1883,6 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
EVT PtrVT = getPointerTy();
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
- if (model != TLSModel::LocalExec)
- llvm_unreachable("only local-exec TLS mode supported");
-
// The high part of the thread pointer is in access register 0.
SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
DAG.getConstant(0, MVT::i32));
@@ -1855,15 +1898,82 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
DAG.getConstant(32, PtrVT));
SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
- // Get the offset of GA from the thread pointer.
- SystemZConstantPoolValue *CPV =
- SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+ // Get the offset of GA from the thread pointer, based on the TLS model.
+ SDValue Offset;
+ switch (model) {
+ case TLSModel::GeneralDynamic: {
+ // Load the GOT offset of the tls_index (module ID / per-symbol offset).
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ Offset, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+
+ // Call __tls_get_offset to retrieve the offset.
+ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
+ break;
+ }
+
+ case TLSModel::LocalDynamic: {
+ // Load the GOT offset of the module ID.
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ Offset, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+
+ // Call __tls_get_offset to retrieve the module base offset.
+ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
+
+ // Note: The SystemZLDCleanupPass will remove redundant computations
+ // of the module base offset. Count total number of local-dynamic
+ // accesses to trigger execution of that pass.
+ SystemZMachineFunctionInfo* MFI =
+ DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+ // Add the per-symbol offset.
+ CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
+
+ SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
+ DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ DTPOffset, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
- // Force the offset into the constant pool and load it from there.
- SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
- SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- CPAddr, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
+ break;
+ }
+
+ case TLSModel::InitialExec: {
+ // Load the offset from the GOT.
+ Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ SystemZII::MO_INDNTPOFF);
+ Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ Offset, MachinePointerInfo::getGOT(),
+ false, false, false, 0);
+ break;
+ }
+
+ case TLSModel::LocalExec: {
+ // Force the offset into the constant pool and load it from there.
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ Offset, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ break;
+ }
+
+ default:
+ llvm_unreachable("Unknown TLS model.");
+ }
// Add the base and offset together.
return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 3922721baf9..fa58b8f553b 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -34,6 +34,11 @@ enum {
CALL,
SIBCALL,
+ // TLS calls. Like regular calls, except operand 1 is the TLS symbol.
+ // (The call target is implicitly __tls_get_offset.)
+ TLS_GDCALL,
+ TLS_LDCALL,
+
// Wraps a TargetGlobalAddress that should be loaded using PC-relative
// accesses (LARL). Operand 0 is the address.
PCREL_WRAPPER,
@@ -258,6 +263,9 @@ private:
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const;
+ SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG, unsigned Opcode,
+ SDValue GOTOffset) const;
SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const;
SDValue lowerBlockAddress(BlockAddressSDNode *Node,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index d2e3f541f80..e711f892c1c 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -56,10 +56,13 @@ static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
// SystemZ MachineOperand target flags.
enum {
// Masks out the bits for the access model.
- MO_SYMBOL_MODIFIER = (1 << 0),
+ MO_SYMBOL_MODIFIER = (3 << 0),
// @GOT (aka @GOTENT)
- MO_GOT = (1 << 0)
+ MO_GOT = (1 << 0),
+
+ // @INDNTPOFF
+ MO_INDNTPOFF = (2 << 0)
};
// Classifies a branch.
enum BranchType {
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 0f752640e7d..a7f774791d4 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -249,6 +249,15 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
}
+// TLS calls. These will be lowered into a call to __tls_get_offset,
+// with an extra relocation specifying the TLS symbol.
+let isCall = 1, Defs = [R14D, CC] in {
+ def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+ [(z_tls_gdcall tglobaltlsaddr:$I2)]>;
+ def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+ [(z_tls_ldcall tglobaltlsaddr:$I2)]>;
+}
+
// Define the general form of the call instructions for the asm parser.
// These instructions don't hard-code %r14 as the return address register.
// Allow an optional TLS marker symbol to generate TLS call relocations.
@@ -588,6 +597,12 @@ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
[(set GR64:$R1, pcrel32:$I2)]>;
}
+// Load the Global Offset Table address. This will be lowered into a
+// larl $R1, _GLOBAL_OFFSET_TABLE_
+// instruction.
+def GOT : Alias<6, (outs GR64:$R1), (ins),
+ [(set GR64:$R1, (global_offset_table))]>;
+
//===----------------------------------------------------------------------===//
// Absolute and Negation
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
new file mode 100644
index 00000000000..24165be29ae
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -0,0 +1,143 @@
+//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines multiple accesses to local-dynamic TLS variables so that
+// the TLS base address for the module is only fetched once per execution path
+// through the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SystemZLDCleanup : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZLDCleanup(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {}
+
+ const char *getPassName() const override {
+ return "SystemZ Local Dynamic TLS Access Clean-up";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg);
+ MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg);
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg);
+
+ const SystemZInstrInfo *TII;
+ MachineFunction *MF;
+};
+
+char SystemZLDCleanup::ID = 0;
+
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) {
+ return new SystemZLDCleanup(TM);
+}
+
+void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ MF = &F;
+
+ SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+}
+
+// Visit the dominator subtree rooted at Node in pre-order.
+// If TLSBaseAddrReg is non-null, then use that to replace any
+// TLS_LDCALL instructions. Otherwise, create the register
+// when the first such instruction is seen, and then use it
+// as we encounter more instructions.
+bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node,
+ unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+ switch (I->getOpcode()) {
+ case SystemZ::TLS_LDCALL:
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (auto I = Node->begin(), E = Node->end(); I != E; ++I)
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+
+ return Changed;
+}
+
+// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg,
+// returning the new instruction.
+MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ // Insert a Copy from TLSBaseAddrReg to R2.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SystemZ::R2D)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_LDCALL instruction.
+ I->eraseFromParent();
+
+ return Copy;
+}
+
+// Create a virtal register in *TLSBaseAddrReg, and populate it by
+// inserting a copy instruction after I. Returns the new instruction.
+MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I,
+ unsigned *TLSBaseAddrReg) {
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass);
+
+ // Insert a copy from R2 to TLSBaseAddrReg.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
+ .addReg(SystemZ::R2D);
+
+ return Copy;
+}
+
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp
index df561e2d800..6bb96f1b40f 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -22,6 +22,8 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
return MCSymbolRefExpr::VK_None;
case SystemZII::MO_GOT:
return MCSymbolRefExpr::VK_GOT;
+ case SystemZII::MO_INDNTPOFF:
+ return MCSymbolRefExpr::VK_INDNTPOFF;
}
llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
}
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 92c2ce7324a..34fc36d6bf6 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -23,11 +23,13 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
unsigned VarArgsFrameIndex;
unsigned RegSaveFrameIndex;
bool ManipulatesSP;
+ unsigned NumLocalDynamics;
public:
explicit SystemZMachineFunctionInfo(MachineFunction &MF)
: LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
- VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {}
+ VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false),
+ NumLocalDynamics(0) {}
// Get and set the first call-saved GPR that should be saved and restored
// by this function. This is 0 if no GPRs need to be saved or restored.
@@ -61,6 +63,10 @@ public:
// e.g. through STACKSAVE or STACKRESTORE.
bool getManipulatesSP() const { return ManipulatesSP; }
void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
+
+ // Count number of local-dynamic TLS symbols used.
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index c70e662db42..51ac5daad54 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -90,6 +90,7 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
[SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
SDNPOutGlue]>;
+def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>;
// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details.
def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
@@ -100,6 +101,12 @@ def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
SDT_ZWrapOffset, []>;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 9b8dc43d94f..73198b1af0a 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -61,6 +61,10 @@ void SystemZPassConfig::addIRPasses() {
bool SystemZPassConfig::addInstSelector() {
addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZLDCleanupPass(getSystemZTargetMachine()));
+
return false;
}
diff --git a/test/CodeGen/SystemZ/tls-01.ll b/test/CodeGen/SystemZ/tls-01.ll
index 16bc8f6e500..da7176c0599 100644
--- a/test/CodeGen/SystemZ/tls-01.ll
+++ b/test/CodeGen/SystemZ/tls-01.ll
@@ -1,7 +1,7 @@
-; Test initial-exec TLS accesses.
+; Test local-exec TLS accesses.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP
@x = thread_local global i32 0
diff --git a/test/CodeGen/SystemZ/tls-02.ll b/test/CodeGen/SystemZ/tls-02.ll
new file mode 100644
index 00000000000..15918d08a93
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-02.ll
@@ -0,0 +1,18 @@
+; Test initial-exec TLS accesses.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
+
+@x = thread_local(initialexec) global i32 0
+
+; The offset must be loaded from the GOT. This TLS access model does
+; not use literal pool constants.
+define i32 *@foo() {
+; CHECK-MAIN-LABEL: foo:
+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
+; CHECK-MAIN: sllg %r2, [[HIGH]], 32
+; CHECK-MAIN: ear %r2, %a1
+; CHECK-MAIN: larl %r1, x@INDNTPOFF
+; CHECK-MAIN: ag %r2, 0(%r1)
+; CHECK-MAIN: br %r14
+ ret i32 *@x
+}
diff --git a/test/CodeGen/SystemZ/tls-03.ll b/test/CodeGen/SystemZ/tls-03.ll
new file mode 100644
index 00000000000..c9f7bd63290
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-03.ll
@@ -0,0 +1,23 @@
+; Test general-dynamic TLS accesses.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP
+
+@x = thread_local global i32 0
+
+; Call __tls_get_offset to retrieve the symbol's TLS offset.
+define i32 *@foo() {
+; CHECK-CP: .LCP{{.*}}:
+; CHECK-CP: .quad x@TLSGD
+;
+; CHECK-MAIN-LABEL: foo:
+; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_
+; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}
+; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_gdcall:x
+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
+; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32
+; CHECK-MAIN: ear [[TP]], %a1
+; CHECK-MAIN: agr %r2, [[TP]]
+; CHECK-MAIN: br %r14
+ ret i32 *@x
+}
diff --git a/test/CodeGen/SystemZ/tls-04.ll b/test/CodeGen/SystemZ/tls-04.ll
new file mode 100644
index 00000000000..dcb210a7127
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-04.ll
@@ -0,0 +1,28 @@
+; Test local-dynamic TLS accesses.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP
+
+@x = thread_local(localdynamic) global i32 0
+
+; Call __tls_get_offset to retrieve the module's TLS base offset.
+; Add the per-symbol offset and the thread pointer.
+define i32 *@foo() {
+; CHECK-CP: .LCP{{.*}}_0:
+; CHECK-CP: .quad x@TLSLDM
+; CHECK-CP: .LCP{{.*}}_1:
+; CHECK-CP: .quad x@DTPOFF
+;
+; CHECK-MAIN-LABEL: foo:
+; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_
+; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}_0
+; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_ldcall:x
+; CHECK-MAIN: larl %r1, .LCP{{.*}}_1
+; CHECK-MAIN: ag %r2, 0(%r1)
+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
+; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32
+; CHECK-MAIN: ear [[TP]], %a1
+; CHECK-MAIN: agr %r2, [[TP]]
+; CHECK-MAIN: br %r14
+ ret i32 *@x
+}
diff --git a/test/CodeGen/SystemZ/tls-05.ll b/test/CodeGen/SystemZ/tls-05.ll
new file mode 100644
index 00000000000..385208d98ca
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-05.ll
@@ -0,0 +1,15 @@
+; Test general-dynamic TLS access optimizations.
+;
+; If we access the same TLS variable twice, there should only be
+; a single call to __tls_get_offset.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1
+
+@x = thread_local global i32 0
+
+define i32 @foo() {
+ %val = load i32* @x
+ %inc = add nsw i32 %val, 1
+ store i32 %inc, i32* @x
+ ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/tls-06.ll b/test/CodeGen/SystemZ/tls-06.ll
new file mode 100644
index 00000000000..fcd8614a793
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-06.ll
@@ -0,0 +1,17 @@
+; Test general-dynamic TLS access optimizations.
+;
+; If we access two different TLS variables, we need two calls to
+; __tls_get_offset, but should load _GLOBAL_OFFSET_TABLE only once.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 2
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "_GLOBAL_OFFSET_TABLE_" | count 1
+
+@x = thread_local global i32 0
+@y = thread_local global i32 0
+
+define i32 @foo() {
+ %valx = load i32* @x
+ %valy = load i32* @y
+ %add = add nsw i32 %valx, %valy
+ ret i32 %add
+}
diff --git a/test/CodeGen/SystemZ/tls-07.ll b/test/CodeGen/SystemZ/tls-07.ll
new file mode 100644
index 00000000000..65475159230
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-07.ll
@@ -0,0 +1,16 @@
+; Test local-dynamic TLS access optimizations.
+;
+; If we access two different local-dynamic TLS variables, we only
+; need a single call to __tls_get_offset.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1
+
+@x = thread_local(localdynamic) global i32 0
+@y = thread_local(localdynamic) global i32 0
+
+define i32 @foo() {
+ %valx = load i32* @x
+ %valy = load i32* @y
+ %add = add nsw i32 %valx, %valy
+ ret i32 %add
+}