1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
%opencl.image2d_t = type opaque
; declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
declare <4 x float> @llvm.AMDGPU.dummytex(<4 x float>, %opencl.image2d_t*, i32, i32) readnone
; NOTE: image=resoureID, sampler=sampler_id must be immeds, so they are patched by separate llvm pass
; define <4 x float> @__read_imagef_norm(i32 %image, i32 %sampler, <4 x float> %coord) nounwind readnone alwaysinline {
; %call = call <4 x float> @llvm.AMDGPU.tex (<4 x float> %coord, i32 0, i32 0, i32 1)
; ret <4 x float> %call
; }
; define <4 x float> @__read_imagef_unorm(i32 %image, i32 %sampler, <4 x float> %coord) nounwind readnone alwaysinline {
; %call = call <4 x float> @llvm.AMDGPU.tex (<4 x float> %coord, i32 0, i32 0, i32 0)
; ret <4 x float> %call
; }
define <4 x float> @__read_imagef(%opencl.image2d_t* nocapture %image, i32 %sampler, <2 x float> %coord) alwaysinline {
entry:
%0 = extractelement <2 x float> %coord, i32 0
%vecinit = insertelement <4 x float> undef, float %0, i32 0
%1 = extractelement <2 x float> %coord, i32 1
%vecinit1 = insertelement <4 x float> %vecinit, float %1, i32 1
%vecinit2 = insertelement <4 x float> %vecinit1, float 0.000000e+00, i32 2
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
%tobool = icmp eq i32 %sampler, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%call.i = call <4 x float> @llvm.AMDGPU.dummytex(<4 x float> %vecinit3, %opencl.image2d_t* %image, i32 %sampler, i32 1)
br label %return
if.else: ; preds = %entry
%call.i1 = call <4 x float> @llvm.AMDGPU.dummytex(<4 x float> %vecinit3, %opencl.image2d_t* %image, i32 %sampler, i32 0)
br label %return
return: ; preds = %if.else, %if.then
%retval.0 = phi <4 x float> [ %call.i, %if.then ], [ %call.i1, %if.else ]
ret <4 x float> %retval.0
}
|