summaryrefslogtreecommitdiff
path: root/tests/cl/program/execute/amdgcn-mubuf-negative-vaddr.cl
blob: 21f11bf66d727cff87f5edda80118de84ba02407 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
>/*!

[config]
name: MUBUF stack addressing behavior
clc_version_min: 10

[test]
name: MUBUF negative buffer offsets
kernel_name: negative_mubuf_vaddr
dimensions: 1
global_size: 16 0 0

arg_out: 0 buffer int[16]        \
  5 5 5 5 \
  5 5 5 5 \
  5 5 5 5 \
  5 5 5 5

!*/

// Prior to gfx9, MUBUF instructions with the vaddr offset enabled
// would always perform a range check. If a negative vaddr base index
// was used, this would fail the range check. The overall address
// computation would compute a valid address, but this doesn't happen
// due to the range check. For out-of-bounds MUBUF loads, a 0 is
// returned.
//
// Therefore it should be safe to fold any VGPR offset on gfx9 into
// the MUBUF vaddr, but not on older subtargets which can only do this
// if the sign bit is known 0.
kernel void negative_mubuf_vaddr(global int* out0)
{
    volatile int array[16];

    int id = get_global_id(0);
    for (int i = 0; i < 16; ++i)
    {
        array[i] = i + 1;
    }

    // Directly addressing the same buffer address works without using vaddr:
    //
    // buffer_load_dword v2, off, s[0:3], s11 offset:20
    // out0[id] = array[4];


    // But having a negative computed base index would fail:
    // v_mov_b32_e32 v0, -8
    // v_lshlrev_b32_e32 v0, 2, v0
    // v_add_i32_e32 v0, vcc, 4, v0
    // buffer_load_dword v2, v0, s[0:3], s11 offen offset:48

#ifdef __AMDGCN__
    // Obscure the value so it can't be folded with other constant or
    // make known bits assumptions.
    int offset;
    __asm volatile("v_mov_b32 %0, -8" : "=v"(offset));
#else
    int offset = -8;
#endif
    out0[id] = array[offset + 12];
}