summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-06-12 09:41:59 -0700
committerTom Stellard <thomas.stellard@amd.com>2013-06-17 09:12:26 -0700
commitb4b0543e5c770c872a01c9ca894c8a3d2759e87c (patch)
treee95633c19fb2e3cd7e26b4676abf24efd865a9d2
parent0d16b1e06c6d5e2fc06721e92a5c9a11772b73b1 (diff)
R600: Add support for GROUP_BARRIER instruction
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td2
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp9
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp10
-rw-r--r--lib/Target/R600/R600InstrInfo.h2
-rw-r--r--lib/Target/R600/R600Instructions.td30
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp6
-rw-r--r--lib/Target/R600/R600Packetizer.cpp8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll24
8 files changed, 88 insertions, 3 deletions
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index eecb25b04f7..9f975bf9bf4 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -50,6 +50,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+
+ def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
}
let TargetPrefix = "TGSI", isTarget = 1 in {
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
index ff5ce5a2dd7..0aea2d7c030 100644
--- a/lib/Target/R600/R600EmitClauseMarkers.cpp
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -177,7 +177,14 @@ private:
AluInstCount ++;
continue;
}
- if (I->getOpcode() == AMDGPU::KILLGT) {
+ // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
+ //
+ // * KILL or INTERP instructions
+ // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
+ // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
+ //
+ // XXX: These checks have not been implemented yet.
+ if (TII->mustBeLastInClause(I->getOpcode())) {
I++;
break;
}
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index adfe144a267..d0aeaa6de5c 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -163,6 +163,16 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
usesTextureCache(MI->getOpcode());
}
+bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
+ switch (Opcode) {
+ case AMDGPU::KILLGT:
+ case AMDGPU::GROUP_BARRIER:
+ return true;
+ default:
+ return false;
+ }
+}
+
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr *MI) const {
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index f06abf6081c..3c2e50be662 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -72,6 +72,8 @@ namespace llvm {
bool usesTextureCache(unsigned Opcode) const;
bool usesTextureCache(const MachineInstr *MI) const;
+ bool mustBeLastInClause(unsigned Opcode) const;
+
/// \returns a pair for each src of an ALU instructions.
/// The first member of a pair is the register id.
/// If register is ALU_CONST, second member is SEL.
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 31e103d6391..5231fdeb069 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1489,6 +1489,36 @@ let hasSideEffects = 1 in {
def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
+def GROUP_BARRIER : InstR600 <
+ (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP2 <0x54> {
+
+ let dst = 0;
+ let dst_rel = 0;
+ let src0 = 0;
+ let src0_rel = 0;
+ let src0_neg = 0;
+ let src0_abs = 0;
+ let src1 = 0;
+ let src1_rel = 0;
+ let src1_neg = 0;
+ let src1_abs = 0;
+ let write = 0;
+ let omod = 0;
+ let clamp = 0;
+ let last = 1;
+ let bank_swizzle = 0;
+ let pred_sel = 0;
+ let update_exec_mask = 0;
+ let update_pred = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+ let ALUInst = 1;
+}
+
// TRUNC is used for the FLT_TO_INT instructions to work around a
// perceived problem where the rounding modes are applied differently
// depending on the instruction and the slot they are in.
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index a330d885743..acc1b4d6ee3 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -269,10 +269,14 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
}
// Does the instruction take a whole IG ?
+ // XXX: Is it possible to add a helper function in R600InstrInfo that can
+ // be used here and in R600PacketizerList::isSoloInstruction() ?
if(TII->isVector(*MI) ||
TII->isCubeOp(MI->getOpcode()) ||
- TII->isReductionOp(MI->getOpcode()))
+ TII->isReductionOp(MI->getOpcode()) ||
+ MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
return AluT_XYZW;
+ }
// Is the result already assigned to a channel ?
unsigned DestSubReg = MI->getOperand(0).getSubReg();
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 6024fd5c85f..4c72d229675 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -82,7 +82,11 @@ private:
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
continue;
- unsigned Dst = BI->getOperand(0).getReg();
+ int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
+ if (DstIdx == -1) {
+ continue;
+ }
+ unsigned Dst = BI->getOperand(DstIdx).getReg();
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
BI->getOpcode() == AMDGPU::DOT4_eg) {
Result[Dst] = AMDGPU::PV_X;
@@ -154,6 +158,8 @@ public:
return true;
if (TII->isTransOnly(MI))
return true;
+ if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
+ return true;
return false;
}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
new file mode 100644
index 00000000000..8d3c9ca2230
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: GROUP_BARRIER
+
+define void @test(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tidig.x()
+ %1 = getelementptr i32 addrspace(1)* %out, i32 %0
+ store i32 %0, i32 addrspace(1)* %1
+ call void @llvm.AMDGPU.barrier.local()
+ %2 = call i32 @llvm.r600.read.local.size.x()
+ %3 = sub i32 %2, 1
+ %4 = sub i32 %3, %0
+ %5 = getelementptr i32 addrspace(1)* %out, i32 %4
+ %6 = load i32 addrspace(1)* %5
+ store i32 %6, i32 addrspace(1)* %1
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare void @llvm.AMDGPU.barrier.local()
+declare i32 @llvm.r600.read.local.size.x() #0
+
+attributes #0 = { readnone }