diff options
Diffstat (limited to 'test/CodeGen/R600/ds_read2.ll')
-rw-r--r-- | test/CodeGen/R600/ds_read2.ll | 148 |
1 files changed, 74 insertions, 74 deletions
diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll index 388d21ba08c..b431b5fe97c 100644 --- a/test/CodeGen/R600/ds_read2.ll +++ b/test/CodeGen/R600/ds_read2.ll @@ -7,11 +7,11 @@ @lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8 ; SI-LABEL: @simple_read2_f32 -; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8 -; SI: S_WAITCNT lgkmcnt(0) -; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] -; SI: BUFFER_STORE_DWORD [[RESULT]] -; SI: S_ENDPGM +; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8 +; SI: s_waitcnt lgkmcnt(0) +; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm define void @simple_read2_f32(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -26,11 +26,11 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f32_max_offset -; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255 -; SI: S_WAITCNT lgkmcnt(0) -; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] -; SI: BUFFER_STORE_DWORD [[RESULT]] -; SI: S_ENDPGM +; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255 +; SI: s_waitcnt lgkmcnt(0) +; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -45,10 +45,10 @@ define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f32_too_far -; SI-NOT DS_READ2_B32 -; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} -; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 -; SI: S_ENDPGM +; SI-NOT ds_read2_b32 +; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 +; SI: s_endpgm define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -63,9 +63,9 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f32_x2 -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8 -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 -; SI: S_ENDPGM +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 +; SI: s_endpgm define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 %idx.0 = add nsw i32 %tid.x, 0 @@ -94,10 +94,10 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; Make sure there is an instruction between the two sets of reads. ; SI-LABEL: @simple_read2_f32_x2_barrier -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8 -; SI: S_BARRIER -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 -; SI: S_ENDPGM +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8 +; SI: s_barrier +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 +; SI: s_endpgm define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 %idx.0 = add nsw i32 %tid.x, 0 @@ -130,9 +130,9 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 { ; element results in only folding the inner pair. ; SI-LABEL: @simple_read2_f32_x2_nonzero_base -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8 -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 -; SI: S_ENDPGM +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 +; SI: s_endpgm define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 %idx.0 = add nsw i32 %tid.x, 2 @@ -166,10 +166,10 @@ define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 { ; register. We can't safely merge this. ; SI-LABEL: @read2_ptr_is_subreg_arg_f32 -; SI-NOT: DS_READ2_B32 -; SI: DS_READ_B32 -; SI: DS_READ_B32 -; SI: S_ENDPGM +; SI-NOT: ds_read2_b32 +; SI: ds_read_b32 +; SI: ds_read_b32 +; SI: s_endpgm define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 @@ -192,10 +192,10 @@ define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float ad ; subregisters. ; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32 -; SI-NOT: DS_READ2_B32 -; SI: DS_READ_B32 -; SI: DS_READ_B32 -; SI: S_ENDPGM +; SI-NOT: ds_read2_b32 +; SI: ds_read_b32 +; SI: ds_read_b32 +; SI: s_endpgm define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 @@ -217,10 +217,10 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f } ; We should be able to merge in this case, but probably not worth the effort. -; SI-NOT: DS_READ2_B32 -; SI: DS_READ_B32 -; SI: DS_READ_B32 -; SI: S_ENDPGM +; SI-NOT: ds_read2_b32 +; SI: ds_read_b32 +; SI: ds_read_b32 +; SI: s_endpgm define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0 @@ -241,10 +241,10 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f32_volatile_0 -; SI-NOT DS_READ2_B32 -; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} -; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 -; SI: S_ENDPGM +; SI-NOT ds_read2_b32 +; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 +; SI: s_endpgm define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -259,10 +259,10 @@ define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f32_volatile_1 -; SI-NOT DS_READ2_B32 -; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} -; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 -; SI: S_ENDPGM +; SI-NOT ds_read2_b32 +; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 +; SI: s_endpgm define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -280,8 +280,8 @@ define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 { ; XXX: This isn't really testing anything useful now. I think CI ; allows unaligned LDS accesses, which would be a problem here. ; SI-LABEL: @unaligned_read2_f32 -; SI-NOT: DS_READ2_B32 -; SI: S_ENDPGM +; SI-NOT: ds_read2_b32 +; SI: s_endpgm define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i @@ -296,8 +296,8 @@ define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* % } ; SI-LABEL: @misaligned_2_simple_read2_f32 -; SI-NOT: DS_READ2_B32 -; SI: S_ENDPGM +; SI-NOT: ds_read2_b32 +; SI: s_endpgm define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i @@ -312,11 +312,11 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs } ; SI-LABEL: @simple_read2_f64 -; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}} -; SI: DS_READ2_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8 -; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} -; SI: BUFFER_STORE_DWORDX2 [[RESULT]] -; SI: S_ENDPGM +; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}} +; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8 +; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} +; SI: buffer_store_dwordx2 [[RESULT]] +; SI: s_endpgm define void @simple_read2_f64(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i @@ -331,8 +331,8 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f64_max_offset -; SI: DS_READ2_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255 -; SI: S_ENDPGM +; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255 +; SI: s_endpgm define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i @@ -347,10 +347,10 @@ define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f64_too_far -; SI-NOT DS_READ2_B64 -; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} -; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056 -; SI: S_ENDPGM +; SI-NOT ds_read2_b64 +; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} +; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056 +; SI: s_endpgm define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i @@ -366,9 +366,9 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { ; Alignment only 4 ; SI-LABEL: @misaligned_read2_f64 -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1 -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15 -; SI: S_ENDPGM +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15 +; SI: s_endpgm define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i @@ -385,8 +385,8 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3) @foo = addrspace(3) global [4 x i32] zeroinitializer, align 4 ; SI-LABEL: @load_constant_adjacent_offsets -; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1 +; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1 define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 @@ -396,8 +396,8 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { } ; SI-LABEL: @load_constant_disjoint_offsets -; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2 +; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2 define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 @@ -409,9 +409,9 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { @bar = addrspace(3) global [4 x i64] zeroinitializer, align 4 ; SI-LABEL: @load_misaligned64_constant_offsets -; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1 -; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3 +; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3 define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 @@ -423,11 +423,11 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { @bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4 ; SI-LABEL: @load_misaligned64_constant_large_offsets -; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} -; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000 -; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1 -; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1 -; SI: S_ENDPGM +; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} +; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000 +; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1 +; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1 +; SI: s_endpgm define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) { %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4 %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4 |