summaryrefslogtreecommitdiff
path: root/kernels
diff options
context:
space:
mode:
Diffstat (limited to 'kernels')
-rw-r--r--kernels/compiler_atomic_functions.cl19
1 files changed, 13 insertions, 6 deletions
diff --git a/kernels/compiler_atomic_functions.cl b/kernels/compiler_atomic_functions.cl
index 61ce2f44..fbc16fbf 100644
--- a/kernels/compiler_atomic_functions.cl
+++ b/kernels/compiler_atomic_functions.cl
@@ -1,14 +1,21 @@
__kernel void compiler_atomic_functions(__global int *dst, __local int *tmp, __global int *src) {
int lid = get_local_id(0);
int i = lid % 12;
- atomic_xchg(&tmp[4], -1);
+ if(lid == 0) {
+ for(int j=0; j<12; j=j+1) {
+ atomic_xchg(&tmp[j], 0);
+ }
+ atomic_xchg(&tmp[4], -1);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
switch(i) {
case 0: atomic_inc(&tmp[i]); break;
case 1: atomic_dec(&tmp[i]); break;
case 2: atomic_add(&tmp[i], src[lid]); break;
case 3: atomic_sub(&tmp[i], src[lid]); break;
- case 4: atomic_and(&tmp[i], ~(src[lid]<<(lid / 4))); break;
- case 5: atomic_or (&tmp[i], src[lid]<<(lid / 4)); break;
+ case 4: atomic_and(&tmp[i], ~(src[lid]<<(lid / 16))); break;
+ case 5: atomic_or (&tmp[i], src[lid]<<(lid / 16)); break;
case 6: atomic_xor(&tmp[i], src[lid]); break;
case 7: atomic_min(&tmp[i], -src[lid]); break;
case 8: atomic_max(&tmp[i], src[lid]); break;
@@ -23,8 +30,8 @@ __kernel void compiler_atomic_functions(__global int *dst, __local int *tmp, __g
case 1: atomic_dec(&dst[i]); break;
case 2: atomic_add(&dst[i], src[lid]); break;
case 3: atomic_sub(&dst[i], src[lid]); break;
- case 4: atomic_and(&dst[i], ~(src[lid]<<(lid / 4))); break;
- case 5: atomic_or (&dst[i], src[lid]<<(lid / 4)); break;
+ case 4: atomic_and(&dst[i], ~(src[lid]<<(lid / 16))); break;
+ case 5: atomic_or (&dst[i], src[lid]<<(lid / 16)); break;
case 6: atomic_xor(&dst[i], src[lid]); break;
case 7: atomic_min(&dst[i], -src[lid]); break;
case 8: atomic_max(&dst[i], src[lid]); break;
@@ -38,6 +45,6 @@ __kernel void compiler_atomic_functions(__global int *dst, __local int *tmp, __g
if(get_global_id(0) == 0) {
for(i=0; i<12; i=i+1)
- atomic_add(&dst[i], tmp[i]);
+ atomic_xchg(&dst[i+12], tmp[i]);
}
}