diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-06-07 12:07:24 -0400 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-06-07 12:07:24 -0400 |
commit | aca34630437817559f674eecdd37fd2e6eea9893 (patch) | |
tree | f0dd8eb0bf6e0d2c381f7985398f3d82f5e88338 /lib/Target/R600/R600Instructions.td | |
parent | 77675fa867e06fc4f7bb2964474c1773f1c1fbf2 (diff) |
XXX: Use correct encoding for Vertex Fetch instructions on Cayman.cayman-only-bfgminer
Diffstat (limited to 'lib/Target/R600/R600Instructions.td')
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 392 |
1 files changed, 255 insertions, 137 deletions
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 73767f0ea5e..d6f38cd7f7e 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -261,6 +261,50 @@ class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs, } +class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern> + : InstR600ISA <outs, (ins MEMxi:$ptr), name, pattern>, + VTX_WORD1_GPR { + + // Static fields + let DST_REL = 0; + // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, + // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, + // however, based on my testing if USE_CONST_FIELDS is set, then all + // these fields need to be set to 0. + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 1; + let FORMAT_COMP_ALL = 0; + let SRF_MODE_ALL = 0; + + let Inst{63-32} = Word1; + // LLVM can only encode 64-bit instructions, so these fields are manually + // encoded in R600CodeEmitter + // + // bits<16> OFFSET; + // bits<2> ENDIAN_SWAP = 0; + // bits<1> CONST_BUF_NO_STRIDE = 0; + // bits<1> MEGA_FETCH = 0; + // bits<1> ALT_CONST = 0; + // bits<2> BUFFER_INDEX_MODE = 0; + + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding + // is done in R600CodeEmitter + // + // Inst{79-64} = OFFSET; + // Inst{81-80} = ENDIAN_SWAP; + // Inst{82} = CONST_BUF_NO_STRIDE; + // Inst{83} = MEGA_FETCH; + // Inst{84} = ALT_CONST; + // Inst{86-85} = BUFFER_INDEX_MODE; + // Inst{95-86} = 0; Reserved + + // VTX_WORD3 (Padding) + // + // Inst{127-96} = 0; + + let VTXInst = 1; +} + class LoadParamFrag <PatFrag load_type> : PatFrag < (ops node:$ptr), (load_type node:$ptr), [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }] @@ -1254,6 +1298,133 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < [(global_store v4i32:$rw_gpr, i32:$index_gpr)] >; +class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> + : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> { + + // Static fields + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = buffer_id; + let SRC_REL = 0; + // XXX: We can infer this field based on the SRC_GPR. This would allow us + // to store vertex addresses in any channel, not just X. + let SRC_SEL_X = 0; + + let Inst{31-0} = Word0; +} + +class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), + pattern> { + + let MEGA_FETCH_COUNT = 1; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 1; // FMT_8 +} + +class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), + pattern> { + let MEGA_FETCH_COUNT = 2; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 5; // FMT_16 + +} + +class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), + pattern> { + + let MEGA_FETCH_COUNT = 4; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 0xD; // COLOR_32 + + // This is not really necessary, but there were some GPU hangs that appeared + // to be caused by ALU instructions in the next instruction group that wrote + // to the $ptr registers of the VTX_READ. + // e.g. + // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 + // %T2_X<def> = MOV %ZERO + //Adding this constraint prevents this from happening. + let Constraints = "$ptr.ptr = $dst"; +} + +class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst), + pattern> { + + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 + + // XXX: Need to force VTX_READ_128 instructions to write to the same register + // that holds its buffer address to avoid potential hangs. We can't use + // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst + // registers are different sizes. +} + +//===----------------------------------------------------------------------===// +// VTX Read from parameter memory space +//===----------------------------------------------------------------------===// + +def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, + [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))] +>; + +def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, + [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))] +>; + +def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, + [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))] +>; + +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, + [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))] +>; + +//===----------------------------------------------------------------------===// +// VTX Read from global memory space +//===----------------------------------------------------------------------===// + +// 8-bit reads +def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, + [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))] +>; + +// 32-bit reads +def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, + [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))] +>; + +// 128-bit reads +def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, + [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))] +>; + +//===----------------------------------------------------------------------===// +// Constant Loads +// XXX: We are currently storing all constants in the global address space. +//===----------------------------------------------------------------------===// + +def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, + [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))] +>; + + } // End Predicates = [isEG] //===----------------------------------------------------------------------===// @@ -1408,13 +1579,71 @@ let hasSideEffects = 1 in { let END_OF_PROGRAM = 1; } +} // End Predicates = [isEGorCayman] + //===----------------------------------------------------------------------===// -// Memory read/write instructions +// Regist loads and stores - for indirect addressing //===----------------------------------------------------------------------===// -class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> - : InstR600ISA <outs, (ins MEMxi:$ptr), name, pattern>, - VTX_WORD1_GPR, VTX_WORD0 { +defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; + +//===----------------------------------------------------------------------===// +// Cayman Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isCayman] in { + +let isVector = 1 in { + +def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; + +def MULLO_INT_cm : MULLO_INT_Common<0x8F>; +def MULHI_INT_cm : MULHI_INT_Common<0x90>; +def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; +def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; +def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; +def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; +def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; +def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; +def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; +def SIN_cm : SIN_Common<0x8D>; +def COS_cm : COS_Common<0x8E>; +} // End isVector = 1 + +def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; +def : SIN_PAT <SIN_cm>; +def : COS_PAT <COS_cm>; + +defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; + +// RECIP_UINT emulation for Cayman +// The multiplication scales from [0,1] to the unsigned integer range +def : Pat < + (AMDGPUurecip i32:$src0), + (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), + (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) +>; + + def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { + let ADDR = 0; + let POP_COUNT = 0; + let COUNT = 0; + } + +def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; + + +def RAT_STORE_DWORD_cm : EG_CF_RAT < + 0x57, 0x14, 0x1, (outs), + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), + "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", + [(global_store i32:$rw_gpr, i32:$index_gpr)] +> { + let eop = 0; // This bit is not used on Cayman. +} + +class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern> + : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> { // Static fields let VC_INST = 0; @@ -1425,53 +1654,18 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> // XXX: We can infer this field based on the SRC_GPR. This would allow us // to store vertex addresses in any channel, not just X. let SRC_SEL_X = 0; - let DST_REL = 0; - // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, - // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, - // however, based on my testing if USE_CONST_FIELDS is set, then all - // these fields need to be set to 0. - let USE_CONST_FIELDS = 0; - let NUM_FORMAT_ALL = 1; - let FORMAT_COMP_ALL = 0; - let SRF_MODE_ALL = 0; + let SRC_SEL_Y = 0; + let STRUCTURED_READ = 0; + let LDS_REQ = 0; + let COALESCED_READ = 0; let Inst{31-0} = Word0; - let Inst{63-32} = Word1; - // LLVM can only encode 64-bit instructions, so these fields are manually - // encoded in R600CodeEmitter - // - // bits<16> OFFSET; - // bits<2> ENDIAN_SWAP = 0; - // bits<1> CONST_BUF_NO_STRIDE = 0; - // bits<1> MEGA_FETCH = 0; - // bits<1> ALT_CONST = 0; - // bits<2> BUFFER_INDEX_MODE = 0; - - - - // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding - // is done in R600CodeEmitter - // - // Inst{79-64} = OFFSET; - // Inst{81-80} = ENDIAN_SWAP; - // Inst{82} = CONST_BUF_NO_STRIDE; - // Inst{83} = MEGA_FETCH; - // Inst{84} = ALT_CONST; - // Inst{86-85} = BUFFER_INDEX_MODE; - // Inst{95-86} = 0; Reserved - - // VTX_WORD3 (Padding) - // - // Inst{127-96} = 0; - - let VTXInst = 1; } -class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), +class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), pattern> { - let MEGA_FETCH_COUNT = 1; let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1479,10 +1673,9 @@ class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> let DATA_FORMAT = 1; // FMT_8 } -class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), +class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), pattern> { - let MEGA_FETCH_COUNT = 2; let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1491,11 +1684,10 @@ class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> } -class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { +class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), + pattern> { - let MEGA_FETCH_COUNT = 4; let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1512,11 +1704,10 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> let Constraints = "$ptr.ptr = $dst"; } -class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst), +class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst), pattern> { - let MEGA_FETCH_COUNT = 16; let DST_SEL_X = 0; let DST_SEL_Y = 1; let DST_SEL_Z = 2; @@ -1532,20 +1723,19 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> //===----------------------------------------------------------------------===// // VTX Read from parameter memory space //===----------------------------------------------------------------------===// - -def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, +def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0, [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))] >; -def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, +def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0, [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))] >; -def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, +def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))] >; -def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, +def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))] >; @@ -1554,92 +1744,20 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, //===----------------------------------------------------------------------===// // 8-bit reads -def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, +def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))] >; // 32-bit reads -def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, +def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))] >; // 128-bit reads -def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, +def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))] >; -//===----------------------------------------------------------------------===// -// Constant Loads -// XXX: We are currently storing all constants in the global address space. -//===----------------------------------------------------------------------===// - -def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, - [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))] ->; - -} - -//===----------------------------------------------------------------------===// -// Regist loads and stores - for indirect addressing -//===----------------------------------------------------------------------===// - -defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; - -//===----------------------------------------------------------------------===// -// Cayman Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isCayman] in { - -let isVector = 1 in { - -def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; - -def MULLO_INT_cm : MULLO_INT_Common<0x8F>; -def MULHI_INT_cm : MULHI_INT_Common<0x90>; -def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; -def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; -def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; -def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; -def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; -def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; -def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; -def SIN_cm : SIN_Common<0x8D>; -def COS_cm : COS_Common<0x8E>; -} // End isVector = 1 - -def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; -def : SIN_PAT <SIN_cm>; -def : COS_PAT <COS_cm>; - -defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; - -// RECIP_UINT emulation for Cayman -// The multiplication scales from [0,1] to the unsigned integer range -def : Pat < - (AMDGPUurecip i32:$src0), - (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), - (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) ->; - - def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { - let ADDR = 0; - let POP_COUNT = 0; - let COUNT = 0; - } - -def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - - -def RAT_STORE_DWORD_cm : EG_CF_RAT < - 0x57, 0x14, 0x1, (outs), - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), - "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", - [(global_store i32:$rw_gpr, i32:$index_gpr)] -> { - let eop = 0; // This bit is not used on Cayman. -} - } // End isCayman //===----------------------------------------------------------------------===// @@ -1760,7 +1878,7 @@ def CONST_COPY : Instruction { def TEX_VTX_CONSTBUF : InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, - VTX_WORD1_GPR, VTX_WORD0 { + VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; let FETCH_TYPE = 2; @@ -1814,7 +1932,7 @@ def TEX_VTX_CONSTBUF : def TEX_VTX_TEXBUF: InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, -VTX_WORD1_GPR, VTX_WORD0 { +VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; let FETCH_TYPE = 2; |