From d4f06bd0ecd05b7b27f9bcd89aa3d1325e60940d Mon Sep 17 00:00:00 2001 From: Pan Xiuli Date: Wed, 19 Oct 2016 14:37:14 +0800 Subject: Libocl: Add sub group broadcast short builtin function Add sub group broadcast and intel sub group broadcast for short type. Signed-off-by: Pan Xiuli Reviewed-by: Yang Rong --- backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 9 +++++++++ backend/src/libocl/tmpl/ocl_simd.tmpl.h | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl index 245ce8a3..d1bcfa36 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl @@ -50,8 +50,17 @@ BROADCAST_IMPL(ulong) BROADCAST_IMPL(half) BROADCAST_IMPL(float) BROADCAST_IMPL(double) +BROADCAST_IMPL(short) +BROADCAST_IMPL(ushort) #undef BROADCAST_IMPL +OVERLOADABLE short intel_sub_group_broadcast(short a, uint local_id) { + return __gen_ocl_sub_group_broadcast(a, local_id); +} + +OVERLOADABLE ushort intel_sub_group_broadcast(ushort a, uint local_id) { + return __gen_ocl_sub_group_broadcast(a, local_id); +} #define RANGE_OP(RANGE, OP, GEN_TYPE, SIGN) \ OVERLOADABLE GEN_TYPE __gen_ocl_sub_group_##RANGE##_##OP(bool sign, GEN_TYPE x); \ diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h index e8dc6f41..c609c2ec 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -42,7 +42,11 @@ OVERLOADABLE ulong sub_group_broadcast(ulong a, uint local_id); OVERLOADABLE half sub_group_broadcast(half a, uint local_id); OVERLOADABLE float sub_group_broadcast(float a, uint local_id); OVERLOADABLE double sub_group_broadcast(double a, uint local_id); +OVERLOADABLE short sub_group_broadcast(short a,uint local_id); +OVERLOADABLE ushort sub_group_broadcast(ushort a, uint local_id); +OVERLOADABLE short intel_sub_group_broadcast(short a, uint local_id); +OVERLOADABLE ushort intel_sub_group_broadcast(ushort a, uint local_id); /* reduce add */ OVERLOADABLE int sub_group_reduce_add(int x); OVERLOADABLE uint sub_group_reduce_add(uint x); -- cgit v1.2.3