diff options
author | Vincent Lejeune <vljn@ovi.com> | 2012-12-08 18:10:19 +0100 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2012-12-11 16:41:03 +0000 |
commit | 6929a40a5def73068f9ebe6ac72313ae56214264 (patch) | |
tree | 26c46ebe97255bfe25070d3846f560d7030b11c0 | |
parent | 674377330063bf34caaa143babe6ca636e4bbc2a (diff) |
R600: Add an intrinsic to handle stream outputs.
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
-rw-r--r-- | lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.cpp | 31 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 65 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Intrinsics.td | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600MachineFunctionInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600MachineFunctionInfo.h | 1 |
6 files changed, 102 insertions, 0 deletions
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 463dbaf6877..dc91924c73e 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -175,6 +175,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, } case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportSwz: + case AMDGPU::EG_ExportBuf: + case AMDGPU::R600_ExportBuf: { uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); EmitByte(INSTR_EXPORT, OS); Emit(Inst, OS); diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 706111601f0..eaeff4ec217 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -386,8 +386,39 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), Chain); + } + case AMDGPUIntrinsic::R600_store_stream_output : { + MachineFunction &MF = DAG.getMachineFunction(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + + SDNode **OutputsMap = MFI->StreamOutputs[BufIndex]; + unsigned Inst; + switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) { + // STREAM3 + case 3: + Inst = 4; + break; + // STREAM2 + case 2: + Inst = 3; + break; + // STREAM1 + case 1: + Inst = 2; + break; + // STREAM0 + case 0: + Inst = 1; + break; + default: + assert(0 && "Wrong buffer id for stream outputs !"); } + return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, + RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2), + Chain); } // default for switch(IntrinsicID) default: break; diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 2dec16d7699..105822066c2 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -492,6 +492,18 @@ class ExportSwzWord1 { let Word1{11-9} = sw_w; } +class ExportBufWord1 { + field bits<32> Word1; + + bits<12> arraySize; + bits<4> compMask; + bits<1> eop; + bits<8> inst; + + let Word1{11-0} = arraySize; + let Word1{15-12} = compMask; +} + multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), (ExportInst @@ -517,6 +529,30 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { >; } +multiclass SteamOutputExportPattern<Instruction ExportInst, + bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { +// Stream0 + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 4095, imm:$mask, buf0inst, 0)>; +// Stream1 + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 4095, imm:$mask, buf1inst, 0)>; +// Stream2 + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 4095, imm:$mask, buf2inst, 0)>; +// Stream3 + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 4095, imm:$mask, buf3inst, 0)>; +} + let isTerminator = 1, usesCustomInserter = 1 in { class ExportSwzInst : InstR600ISA<( @@ -533,6 +569,16 @@ class ExportSwzInst : InstR600ISA<( } // End isTerminator = 1, usesCustomInserter = 1 +class ExportBufInst : InstR600ISA<( + outs), + (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, + i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), + !strconcat("EXPORT", " $gpr"), + []>, ExportWord0, ExportBufWord1 { + let elem_size = 0; + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} let Predicates = [isR600toCayman] in { @@ -983,6 +1029,15 @@ let Predicates = [isR600] in { let Word1{31} = 1; // BARRIER } defm : ExportPattern<R600_ExportSwz, 39>; + + def R600_ExportBuf : ExportBufInst { + let Word1{20-17} = 1; // BURST_COUNT + let Word1{21} = eop; + let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{30-23} = inst; + let Word1{31} = 1; // BARRIER + } + defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; } // Helper pattern for normalizing inputs to triginomic instructions for R700+ @@ -1123,6 +1178,16 @@ let Predicates = [isEGorCayman] in { } defm : ExportPattern<EG_ExportSwz, 83>; + def EG_ExportBuf : ExportBufInst { + let Word1{19-16} = 1; // BURST_COUNT + let Word1{20} = 1; // VALID_PIXEL_MODE + let Word1{21} = eop; + let Word1{29-22} = inst; + let Word1{30} = 0; // MARK + let Word1{31} = 1; // BARRIER + } + defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; + //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td index 3a7e3ee0d0c..3825bc4d3b0 100644 --- a/lib/Target/AMDGPU/R600Intrinsics.td +++ b/lib/Target/AMDGPU/R600Intrinsics.td @@ -19,6 +19,8 @@ let TargetPrefix = "R600", isTarget = 1 in { Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; def int_R600_load_input_linear : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_store_stream_output : + Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_pixel_color : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; def int_R600_store_pixel_depth : diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp index bcb7f943606..4eb5efa19f0 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp @@ -17,6 +17,7 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) HasLinearInterpolation(false), HasPerspectiveInterpolation(false) { memset(Outputs, 0, sizeof(Outputs)); + memset(StreamOutputs, 0, sizeof(StreamOutputs)); } unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const { diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h index 91f9de28270..e97fb5be62e 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h @@ -25,6 +25,7 @@ public: R600MachineFunctionInfo(const MachineFunction &MF); std::vector<unsigned> ReservedRegs; SDNode *Outputs[16]; + SDNode *StreamOutputs[64][4]; bool HasLinearInterpolation; bool HasPerspectiveInterpolation; |