diff options
author | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-09-24 15:52:47 +0000 |
---|---|---|
committer | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-09-24 15:52:47 +0000 |
commit | de481a4aa3bb5388783572001598417ea184c03b (patch) | |
tree | 6cd379dd4a595d36e73b99553c6cba1662917a27 | |
parent | 0a04741273c801e192e3012eb385bad9bc10e5d2 (diff) |
R600: Add support for v4f32 stores on R600
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@164535 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 3 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.cpp | 3 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 30 | ||||
-rw-r--r-- | test/CodeGen/R600/store.v4f32.ll | 9 |
4 files changed, 36 insertions, 9 deletions
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 5b1d780beb..aa363e7048 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -156,7 +156,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, return; } else { switch(MI.getOpcode()) { - case AMDGPU::RAT_WRITE_CACHELESS_eg: + case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { uint64_t inst = getBinaryCodeForInstr(MI, Fixups); EmitByte(INSTR_NATIVE, OS); diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 029c7d0f54..76ce188107 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -121,7 +121,8 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( return BB; } - case AMDGPU::RAT_WRITE_CACHELESS_eg: + case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { // Convert to DWORD address unsigned NewAddr = MRI.createVirtualRegister( diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index e437e6c580..6cb6a83285 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -954,10 +954,8 @@ let Predicates = [isEGorCayman] in { let usesCustomInserter = 1 in { -def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), - "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr, $eop", - []> +class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_RAT < + 0x57, 0x2, 0, (outs), ins, !strconcat(name, " $rw_gpr, $index_gpr, $eop"), []> { let RIM = 0; // XXX: Have a separate instruction for non-indexed writes. @@ -966,7 +964,7 @@ def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), let ELEM_SIZE = 0; let ARRAY_SIZE = 0; - let COMP_MASK = 1; + let COMP_MASK = comp_mask; let BURST_COUNT = 0; let VPM = 0; let MARK = 0; @@ -975,16 +973,34 @@ def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), } // End usesCustomInserter = 1 +// 32-bit store +def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), + 0x1, "RAT_WRITE_CACHELESS_32_eg" +>; + // i32 global_store def : Pat < (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), - (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) + (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) >; // Floating point global_store def : Pat < (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), - (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) + (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) +>; + +//128-bit store +def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < + (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), + 0xf, "RAT_WRITE_CACHELESS_128" +>; + +// v4f32 global store +def : Pat < + (global_store (v4f32 R600_Reg128:$val), R600_TReg32_X:$ptr), + (RAT_WRITE_CACHELESS_128_eg R600_Reg128:$val, R600_TReg32_X:$ptr, 0) >; class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern> diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll new file mode 100644 index 0000000000..8b0d244459 --- /dev/null +++ b/test/CodeGen/R600/store.v4f32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %1 = load <4 x float> addrspace(1) * %in + store <4 x float> %1, <4 x float> addrspace(1)* %out + ret void +} |