summaryrefslogtreecommitdiff
path: root/generic
diff options
context:
space:
mode:
authorPeter Collingbourne <peter@pcc.me.uk>2012-01-08 22:09:58 +0000
committerPeter Collingbourne <peter@pcc.me.uk>2012-01-08 22:09:58 +0000
commit6937ba961c6fb0f59b53f3b22c5aef78982b10d5 (patch)
tree7a74c51a5dca9603f69d5f7fe6b6068f4b21c7d2 /generic
parent256deb0078b9b1c04e0eeba559ca3a2887fc4551 (diff)
Initial commit.
git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@147756 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'generic')
-rw-r--r--generic/include/clc/as_type.h53
-rw-r--r--generic/include/clc/clc.h57
-rw-r--r--generic/include/clc/clcfunc.h4
-rw-r--r--generic/include/clc/clcmacro.h42
-rw-r--r--generic/include/clc/clctypes.h74
-rw-r--r--generic/include/clc/gentype.inc51
-rw-r--r--generic/include/clc/geometric/cross.h2
-rw-r--r--generic/include/clc/geometric/distance.h2
-rw-r--r--generic/include/clc/geometric/dot.h2
-rw-r--r--generic/include/clc/geometric/floatn.inc35
-rw-r--r--generic/include/clc/geometric/length.h2
-rw-r--r--generic/include/clc/geometric/length.inc1
-rw-r--r--generic/include/clc/geometric/normalize.h2
-rw-r--r--generic/include/clc/geometric/normalize.inc1
-rw-r--r--generic/include/clc/integer/abs.h2
-rw-r--r--generic/include/clc/integer/abs.inc1
-rw-r--r--generic/include/clc/integer/abs_diff.h2
-rw-r--r--generic/include/clc/integer/abs_diff.inc1
-rw-r--r--generic/include/clc/integer/add_sat.h2
-rw-r--r--generic/include/clc/integer/add_sat.inc1
-rw-r--r--generic/include/clc/integer/gentype.inc385
-rw-r--r--generic/include/clc/math/cos.h6
-rw-r--r--generic/include/clc/math/native_cos.h1
-rw-r--r--generic/include/clc/math/native_divide.h1
-rw-r--r--generic/include/clc/math/native_sin.h1
-rw-r--r--generic/include/clc/math/native_sqrt.h1
-rw-r--r--generic/include/clc/math/sin.h6
-rw-r--r--generic/include/clc/math/sqrt.h6
-rw-r--r--generic/include/clc/math/unary_decl.inc1
-rw-r--r--generic/include/clc/math/unary_intrin.inc18
-rw-r--r--generic/include/clc/relational/select.h1
-rw-r--r--generic/include/clc/synchronization/cl_mem_fence_flags.h4
-rw-r--r--generic/lib/SOURCES8
-rw-r--r--generic/lib/geometric/cross.cl11
-rw-r--r--generic/lib/geometric/dot.cl17
-rw-r--r--generic/lib/geometric/length.cl4
-rw-r--r--generic/lib/geometric/length.inc3
-rw-r--r--generic/lib/geometric/normalize.cl4
-rw-r--r--generic/lib/geometric/normalize.inc3
-rw-r--r--generic/lib/integer/abs.cl4
-rw-r--r--generic/lib/integer/abs.inc3
-rw-r--r--generic/lib/integer/abs_diff.cl4
-rw-r--r--generic/lib/integer/abs_diff.inc3
-rw-r--r--generic/lib/integer/add_sat.cl52
-rw-r--r--generic/lib/integer/add_sat.ll55
-rw-r--r--generic/lib/integer/add_sat_impl.ll83
46 files changed, 1022 insertions, 0 deletions
diff --git a/generic/include/clc/as_type.h b/generic/include/clc/as_type.h
new file mode 100644
index 0000000..ef7b2b1
--- /dev/null
+++ b/generic/include/clc/as_type.h
@@ -0,0 +1,53 @@
+#define as_char(x) __builtin_astype(x, char)
+#define as_uchar(x) __builtin_astype(x, uchar)
+#define as_short(x) __builtin_astype(x, short)
+#define as_ushort(x) __builtin_astype(x, ushort)
+#define as_int(x) __builtin_astype(x, int)
+#define as_uint(x) __builtin_astype(x, uint)
+#define as_long(x) __builtin_astype(x, long)
+#define as_ulong(x) __builtin_astype(x, ulong)
+
+#define as_char2(x) __builtin_astype(x, char2)
+#define as_uchar2(x) __builtin_astype(x, uchar2)
+#define as_short2(x) __builtin_astype(x, short2)
+#define as_ushort2(x) __builtin_astype(x, ushort2)
+#define as_int2(x) __builtin_astype(x, int2)
+#define as_uint2(x) __builtin_astype(x, uint2)
+#define as_long2(x) __builtin_astype(x, long2)
+#define as_ulong2(x) __builtin_astype(x, ulong2)
+
+#define as_char3(x) __builtin_astype(x, char3)
+#define as_uchar3(x) __builtin_astype(x, uchar3)
+#define as_short3(x) __builtin_astype(x, short3)
+#define as_ushort3(x) __builtin_astype(x, ushort3)
+#define as_int3(x) __builtin_astype(x, int3)
+#define as_uint3(x) __builtin_astype(x, uint3)
+#define as_long3(x) __builtin_astype(x, long3)
+#define as_ulong3(x) __builtin_astype(x, ulong3)
+
+#define as_char4(x) __builtin_astype(x, char4)
+#define as_uchar4(x) __builtin_astype(x, uchar4)
+#define as_short4(x) __builtin_astype(x, short4)
+#define as_ushort4(x) __builtin_astype(x, ushort4)
+#define as_int4(x) __builtin_astype(x, int4)
+#define as_uint4(x) __builtin_astype(x, uint4)
+#define as_long4(x) __builtin_astype(x, long4)
+#define as_ulong4(x) __builtin_astype(x, ulong4)
+
+#define as_char8(x) __builtin_astype(x, char8)
+#define as_uchar8(x) __builtin_astype(x, uchar8)
+#define as_short8(x) __builtin_astype(x, short8)
+#define as_ushort8(x) __builtin_astype(x, ushort8)
+#define as_int8(x) __builtin_astype(x, int8)
+#define as_uint8(x) __builtin_astype(x, uint8)
+#define as_long8(x) __builtin_astype(x, long8)
+#define as_ulong8(x) __builtin_astype(x, ulong8)
+
+#define as_char16(x) __builtin_astype(x, char16)
+#define as_uchar16(x) __builtin_astype(x, uchar16)
+#define as_short16(x) __builtin_astype(x, short16)
+#define as_ushort16(x) __builtin_astype(x, ushort16)
+#define as_int16(x) __builtin_astype(x, int16)
+#define as_uint16(x) __builtin_astype(x, uint16)
+#define as_long16(x) __builtin_astype(x, long16)
+#define as_ulong16(x) __builtin_astype(x, ulong16)
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
new file mode 100644
index 0000000..983884b
--- /dev/null
+++ b/generic/include/clc/clc.h
@@ -0,0 +1,57 @@
+#ifndef cl_clang_storage_class_specifiers
+#error Implementation requires cl_clang_storage_class_specifiers extension!
+#endif
+
+#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+/* Function Attributes */
+#include <clc/clcfunc.h>
+
+/* Pattern Macro Definitions */
+#include <clc/clcmacro.h>
+
+/* 6.1 Supported Data Types */
+#include <clc/clctypes.h>
+
+/* 6.2.4.2 Reinterpreting Types Using as_type() and as_typen() */
+#include <clc/as_type.h>
+
+/* 6.11.1 Work-Item Functions */
+#include <clc/workitem/get_global_size.h>
+#include <clc/workitem/get_global_id.h>
+#include <clc/workitem/get_local_size.h>
+#include <clc/workitem/get_local_id.h>
+#include <clc/workitem/get_num_groups.h>
+#include <clc/workitem/get_group_id.h>
+
+/* 6.11.2 Math Functions */
+#include <clc/math/cos.h>
+#include <clc/math/sin.h>
+#include <clc/math/sqrt.h>
+#include <clc/math/native_cos.h>
+#include <clc/math/native_divide.h>
+#include <clc/math/native_sin.h>
+#include <clc/math/native_sqrt.h>
+
+/* 6.11.3 Integer Functions */
+#include <clc/integer/abs.h>
+#include <clc/integer/abs_diff.h>
+#include <clc/integer/add_sat.h>
+
+/* 6.11.5 Geometric Functions */
+#include <clc/geometric/cross.h>
+#include <clc/geometric/length.h>
+#include <clc/geometric/normalize.h>
+
+/* 6.11.6 Relational Functions */
+#include <clc/relational/select.h>
+
+/* 6.11.8 Synchronization Functions */
+#include <clc/synchronization/cl_mem_fence_flags.h>
+#include <clc/synchronization/barrier.h>
+
+#pragma OPENCL EXTENSION all : disable
diff --git a/generic/include/clc/clcfunc.h b/generic/include/clc/clcfunc.h
new file mode 100644
index 0000000..46067fc
--- /dev/null
+++ b/generic/include/clc/clcfunc.h
@@ -0,0 +1,4 @@
+#define _CLC_OVERLOAD __attribute__((overloadable))
+#define _CLC_DECL
+#define _CLC_DEF __attribute__((always_inline))
+#define _CLC_INLINE __attribute__((always_inline)) static inline
diff --git a/generic/include/clc/clcmacro.h b/generic/include/clc/clcmacro.h
new file mode 100644
index 0000000..d10a613
--- /dev/null
+++ b/generic/include/clc/clcmacro.h
@@ -0,0 +1,42 @@
+#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
+ return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
+ return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ }
+
+#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
+ return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
+ return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
+ FUNCTION(x.z, y.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ }
diff --git a/generic/include/clc/clctypes.h b/generic/include/clc/clctypes.h
new file mode 100644
index 0000000..ca729f7
--- /dev/null
+++ b/generic/include/clc/clctypes.h
@@ -0,0 +1,74 @@
+/* 6.1.1 Built-in Scalar Data Types */
+
+#include <stddef.h>
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+/* 6.1.2 Built-in Vector Data Types */
+
+typedef __attribute__((ext_vector_type(2))) char char2;
+typedef __attribute__((ext_vector_type(3))) char char3;
+typedef __attribute__((ext_vector_type(4))) char char4;
+typedef __attribute__((ext_vector_type(8))) char char8;
+typedef __attribute__((ext_vector_type(16))) char char16;
+
+typedef __attribute__((ext_vector_type(2))) uchar uchar2;
+typedef __attribute__((ext_vector_type(3))) uchar uchar3;
+typedef __attribute__((ext_vector_type(4))) uchar uchar4;
+typedef __attribute__((ext_vector_type(8))) uchar uchar8;
+typedef __attribute__((ext_vector_type(16))) uchar uchar16;
+
+typedef __attribute__((ext_vector_type(2))) short short2;
+typedef __attribute__((ext_vector_type(3))) short short3;
+typedef __attribute__((ext_vector_type(4))) short short4;
+typedef __attribute__((ext_vector_type(8))) short short8;
+typedef __attribute__((ext_vector_type(16))) short short16;
+
+typedef __attribute__((ext_vector_type(2))) ushort ushort2;
+typedef __attribute__((ext_vector_type(3))) ushort ushort3;
+typedef __attribute__((ext_vector_type(4))) ushort ushort4;
+typedef __attribute__((ext_vector_type(8))) ushort ushort8;
+typedef __attribute__((ext_vector_type(16))) ushort ushort16;
+
+typedef __attribute__((ext_vector_type(2))) int int2;
+typedef __attribute__((ext_vector_type(3))) int int3;
+typedef __attribute__((ext_vector_type(4))) int int4;
+typedef __attribute__((ext_vector_type(8))) int int8;
+typedef __attribute__((ext_vector_type(16))) int int16;
+
+typedef __attribute__((ext_vector_type(2))) uint uint2;
+typedef __attribute__((ext_vector_type(3))) uint uint3;
+typedef __attribute__((ext_vector_type(4))) uint uint4;
+typedef __attribute__((ext_vector_type(8))) uint uint8;
+typedef __attribute__((ext_vector_type(16))) uint uint16;
+
+typedef __attribute__((ext_vector_type(2))) long long2;
+typedef __attribute__((ext_vector_type(3))) long long3;
+typedef __attribute__((ext_vector_type(4))) long long4;
+typedef __attribute__((ext_vector_type(8))) long long8;
+typedef __attribute__((ext_vector_type(16))) long long16;
+
+typedef __attribute__((ext_vector_type(2))) ulong ulong2;
+typedef __attribute__((ext_vector_type(3))) ulong ulong3;
+typedef __attribute__((ext_vector_type(4))) ulong ulong4;
+typedef __attribute__((ext_vector_type(8))) ulong ulong8;
+typedef __attribute__((ext_vector_type(16))) ulong ulong16;
+
+typedef __attribute__((ext_vector_type(2))) float float2;
+typedef __attribute__((ext_vector_type(3))) float float3;
+typedef __attribute__((ext_vector_type(4))) float float4;
+typedef __attribute__((ext_vector_type(8))) float float8;
+typedef __attribute__((ext_vector_type(16))) float float16;
+
+/* 9.3 Double Precision Floating-Point */
+
+#ifdef cl_khr_fp64
+typedef __attribute__((ext_vector_type(2))) double double2;
+typedef __attribute__((ext_vector_type(3))) double double3;
+typedef __attribute__((ext_vector_type(4))) double double4;
+typedef __attribute__((ext_vector_type(8))) double double8;
+typedef __attribute__((ext_vector_type(16))) double double16;
+#endif
diff --git a/generic/include/clc/gentype.inc b/generic/include/clc/gentype.inc
new file mode 100644
index 0000000..4506920
--- /dev/null
+++ b/generic/include/clc/gentype.inc
@@ -0,0 +1,51 @@
+#define GENTYPE float
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float2
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float3
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float4
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float8
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float16
+#include BODY
+#undef GENTYPE
+
+#ifdef cl_khr_fp64
+#define GENTYPE double
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double2
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double3
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double4
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double8
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double16
+#include BODY
+#undef GENTYPE
+#endif
+
+#undef BODY
diff --git a/generic/include/clc/geometric/cross.h b/generic/include/clc/geometric/cross.h
new file mode 100644
index 0000000..74117c0
--- /dev/null
+++ b/generic/include/clc/geometric/cross.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1);
+_CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1);
diff --git a/generic/include/clc/geometric/distance.h b/generic/include/clc/geometric/distance.h
new file mode 100644
index 0000000..1660dcd
--- /dev/null
+++ b/generic/include/clc/geometric/distance.h
@@ -0,0 +1,2 @@
+#define BODY <clc/geometric/distance.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/generic/include/clc/geometric/dot.h b/generic/include/clc/geometric/dot.h
new file mode 100644
index 0000000..5f0464f
--- /dev/null
+++ b/generic/include/clc/geometric/dot.h
@@ -0,0 +1,2 @@
+#define BODY <clc/geometric/dot.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/generic/include/clc/geometric/floatn.inc b/generic/include/clc/geometric/floatn.inc
new file mode 100644
index 0000000..c77c464
--- /dev/null
+++ b/generic/include/clc/geometric/floatn.inc
@@ -0,0 +1,35 @@
+#define FLOATN float
+#include BODY
+#undef FLOATN
+
+#define FLOATN float2
+#include BODY
+#undef FLOATN
+
+#define FLOATN float3
+#include BODY
+#undef FLOATN
+
+#define FLOATN float4
+#include BODY
+#undef FLOATN
+
+#ifdef cl_khr_fp64
+#define FLOATN double
+#include BODY
+#undef FLOATN
+
+#define FLOATN double2
+#include BODY
+#undef FLOATN
+
+#define FLOATN double3
+#include BODY
+#undef FLOATN
+
+#define FLOATN double4
+#include BODY
+#undef FLOATN
+#endif
+
+#undef BODY
diff --git a/generic/include/clc/geometric/length.h b/generic/include/clc/geometric/length.h
new file mode 100644
index 0000000..fbba634
--- /dev/null
+++ b/generic/include/clc/geometric/length.h
@@ -0,0 +1,2 @@
+#define BODY <clc/geometric/length.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/generic/include/clc/geometric/length.inc b/generic/include/clc/geometric/length.inc
new file mode 100644
index 0000000..8ee8bf3
--- /dev/null
+++ b/generic/include/clc/geometric/length.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL float length(FLOATN p0);
diff --git a/generic/include/clc/geometric/normalize.h b/generic/include/clc/geometric/normalize.h
new file mode 100644
index 0000000..3aaf61c
--- /dev/null
+++ b/generic/include/clc/geometric/normalize.h
@@ -0,0 +1,2 @@
+#define BODY <clc/geometric/normalize.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/generic/include/clc/geometric/normalize.inc b/generic/include/clc/geometric/normalize.inc
new file mode 100644
index 0000000..7b4f69d
--- /dev/null
+++ b/generic/include/clc/geometric/normalize.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL FLOATN normalize(FLOATN p);
diff --git a/generic/include/clc/integer/abs.h b/generic/include/clc/integer/abs.h
new file mode 100644
index 0000000..7592e4b
--- /dev/null
+++ b/generic/include/clc/integer/abs.h
@@ -0,0 +1,2 @@
+#define BODY <clc/integer/abs.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/include/clc/integer/abs.inc b/generic/include/clc/integer/abs.inc
new file mode 100644
index 0000000..bfbec20
--- /dev/null
+++ b/generic/include/clc/integer/abs.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL UGENTYPE abs(GENTYPE x);
diff --git a/generic/include/clc/integer/abs_diff.h b/generic/include/clc/integer/abs_diff.h
new file mode 100644
index 0000000..16fb465
--- /dev/null
+++ b/generic/include/clc/integer/abs_diff.h
@@ -0,0 +1,2 @@
+#define BODY <clc/integer/abs_diff.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/include/clc/integer/abs_diff.inc b/generic/include/clc/integer/abs_diff.inc
new file mode 100644
index 0000000..8cfdb9b
--- /dev/null
+++ b/generic/include/clc/integer/abs_diff.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL UGENTYPE abs_diff(GENTYPE x, GENTYPE y);
diff --git a/generic/include/clc/integer/add_sat.h b/generic/include/clc/integer/add_sat.h
new file mode 100644
index 0000000..9dbe12a
--- /dev/null
+++ b/generic/include/clc/integer/add_sat.h
@@ -0,0 +1,2 @@
+#define BODY <clc/integer/add_sat.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/include/clc/integer/add_sat.inc b/generic/include/clc/integer/add_sat.inc
new file mode 100644
index 0000000..2ea8a83
--- /dev/null
+++ b/generic/include/clc/integer/add_sat.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL GENTYPE add_sat(GENTYPE x, GENTYPE y);
diff --git a/generic/include/clc/integer/gentype.inc b/generic/include/clc/integer/gentype.inc
new file mode 100644
index 0000000..0b32efd
--- /dev/null
+++ b/generic/include/clc/integer/gentype.inc
@@ -0,0 +1,385 @@
+#define GENTYPE char
+#define UGENTYPE uchar
+#define SGENTYPE char
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char2
+#define UGENTYPE uchar2
+#define SGENTYPE char2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char3
+#define UGENTYPE uchar3
+#define SGENTYPE char3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char4
+#define UGENTYPE uchar4
+#define SGENTYPE char4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char8
+#define UGENTYPE uchar8
+#define SGENTYPE char8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char16
+#define UGENTYPE uchar16
+#define SGENTYPE char16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar
+#define UGENTYPE uchar
+#define SGENTYPE char
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar2
+#define UGENTYPE uchar2
+#define SGENTYPE char2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar3
+#define UGENTYPE uchar3
+#define SGENTYPE char3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar4
+#define UGENTYPE uchar4
+#define SGENTYPE char4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar8
+#define UGENTYPE uchar8
+#define SGENTYPE char8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar16
+#define UGENTYPE uchar16
+#define SGENTYPE char16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short
+#define UGENTYPE ushort
+#define SGENTYPE short
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short2
+#define UGENTYPE ushort2
+#define SGENTYPE short2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short3
+#define UGENTYPE ushort3
+#define SGENTYPE short3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short4
+#define UGENTYPE ushort4
+#define SGENTYPE short4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short8
+#define UGENTYPE ushort8
+#define SGENTYPE short8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short16
+#define UGENTYPE ushort16
+#define SGENTYPE short16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort
+#define UGENTYPE ushort
+#define SGENTYPE short
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort2
+#define UGENTYPE ushort2
+#define SGENTYPE short2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort3
+#define UGENTYPE ushort3
+#define SGENTYPE short3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort4
+#define UGENTYPE ushort4
+#define SGENTYPE short4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort8
+#define UGENTYPE ushort8
+#define SGENTYPE short8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort16
+#define UGENTYPE ushort16
+#define SGENTYPE short16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int
+#define UGENTYPE uint
+#define SGENTYPE int
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int2
+#define UGENTYPE uint2
+#define SGENTYPE int2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int3
+#define UGENTYPE uint3
+#define SGENTYPE int3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int4
+#define UGENTYPE uint4
+#define SGENTYPE int4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int8
+#define UGENTYPE uint8
+#define SGENTYPE int8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int16
+#define UGENTYPE uint16
+#define SGENTYPE int16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint
+#define UGENTYPE uint
+#define SGENTYPE int
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint2
+#define UGENTYPE uint2
+#define SGENTYPE int2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint3
+#define UGENTYPE uint3
+#define SGENTYPE int3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint4
+#define UGENTYPE uint4
+#define SGENTYPE int4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint8
+#define UGENTYPE uint8
+#define SGENTYPE int8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint16
+#define UGENTYPE uint16
+#define SGENTYPE int16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long
+#define UGENTYPE ulong
+#define SGENTYPE long
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long2
+#define UGENTYPE ulong2
+#define SGENTYPE long2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long3
+#define UGENTYPE ulong3
+#define SGENTYPE long3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long4
+#define UGENTYPE ulong4
+#define SGENTYPE long4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long8
+#define UGENTYPE ulong8
+#define SGENTYPE long8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long16
+#define UGENTYPE ulong16
+#define SGENTYPE long16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong
+#define UGENTYPE ulong
+#define SGENTYPE long
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong2
+#define UGENTYPE ulong2
+#define SGENTYPE long2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong3
+#define UGENTYPE ulong3
+#define SGENTYPE long3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong4
+#define UGENTYPE ulong4
+#define SGENTYPE long4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong8
+#define UGENTYPE ulong8
+#define SGENTYPE long8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong16
+#define UGENTYPE ulong16
+#define SGENTYPE long16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#undef BODY
diff --git a/generic/include/clc/math/cos.h b/generic/include/clc/math/cos.h
new file mode 100644
index 0000000..e876c1a
--- /dev/null
+++ b/generic/include/clc/math/cos.h
@@ -0,0 +1,6 @@
+#undef cos
+#define cos __clc_cos
+
+#define FUNCTION __clc_cos
+#define INTRINSIC "llvm.cos"
+#include <clc/math/unary_intrin.inc>
diff --git a/generic/include/clc/math/native_cos.h b/generic/include/clc/math/native_cos.h
new file mode 100644
index 0000000..c7212cc
--- /dev/null
+++ b/generic/include/clc/math/native_cos.h
@@ -0,0 +1 @@
+#define native_cos cos
diff --git a/generic/include/clc/math/native_divide.h b/generic/include/clc/math/native_divide.h
new file mode 100644
index 0000000..5c52167
--- /dev/null
+++ b/generic/include/clc/math/native_divide.h
@@ -0,0 +1 @@
+#define native_divide(x, y) ((x) / (y))
diff --git a/generic/include/clc/math/native_sin.h b/generic/include/clc/math/native_sin.h
new file mode 100644
index 0000000..569a051
--- /dev/null
+++ b/generic/include/clc/math/native_sin.h
@@ -0,0 +1 @@
+#define native_sin sin
diff --git a/generic/include/clc/math/native_sqrt.h b/generic/include/clc/math/native_sqrt.h
new file mode 100644
index 0000000..a9525fc
--- /dev/null
+++ b/generic/include/clc/math/native_sqrt.h
@@ -0,0 +1 @@
+#define native_sqrt sqrt
diff --git a/generic/include/clc/math/sin.h b/generic/include/clc/math/sin.h
new file mode 100644
index 0000000..2216804
--- /dev/null
+++ b/generic/include/clc/math/sin.h
@@ -0,0 +1,6 @@
+#undef sin
+#define sin __clc_sin
+
+#define FUNCTION __clc_sin
+#define INTRINSIC "llvm.sin"
+#include <clc/math/unary_intrin.inc>
diff --git a/generic/include/clc/math/sqrt.h b/generic/include/clc/math/sqrt.h
new file mode 100644
index 0000000..a000e24
--- /dev/null
+++ b/generic/include/clc/math/sqrt.h
@@ -0,0 +1,6 @@
+#undef sqrt
+#define sqrt __clc_sqrt
+
+#define FUNCTION __clc_sqrt
+#define INTRINSIC "llvm.sqrt"
+#include <clc/math/unary_intrin.inc>
diff --git a/generic/include/clc/math/unary_decl.inc b/generic/include/clc/math/unary_decl.inc
new file mode 100644
index 0000000..392c4d6
--- /dev/null
+++ b/generic/include/clc/math/unary_decl.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL GENTYPE FUNCTION(GENTYPE x);
diff --git a/generic/include/clc/math/unary_intrin.inc b/generic/include/clc/math/unary_intrin.inc
new file mode 100644
index 0000000..2da5a9c
--- /dev/null
+++ b/generic/include/clc/math/unary_intrin.inc
@@ -0,0 +1,18 @@
+_CLC_OVERLOAD float FUNCTION(float f) __asm(INTRINSIC ".f32");
+_CLC_OVERLOAD float2 FUNCTION(float2 f) __asm(INTRINSIC ".v2f32");
+_CLC_OVERLOAD float3 FUNCTION(float3 f) __asm(INTRINSIC ".v3f32");
+_CLC_OVERLOAD float4 FUNCTION(float4 f) __asm(INTRINSIC ".v4f32");
+_CLC_OVERLOAD float8 FUNCTION(float8 f) __asm(INTRINSIC ".v8f32");
+_CLC_OVERLOAD float16 FUNCTION(float16 f) __asm(INTRINSIC ".v16f32");
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD double FUNCTION(double d) __asm(INTRINSIC ".f64");
+_CLC_OVERLOAD double2 FUNCTION(double2 d) __asm(INTRINSIC ".v2f64");
+_CLC_OVERLOAD double3 FUNCTION(double3 d) __asm(INTRINSIC ".v3f64");
+_CLC_OVERLOAD double4 FUNCTION(double4 d) __asm(INTRINSIC ".v4f64");
+_CLC_OVERLOAD double8 FUNCTION(double8 d) __asm(INTRINSIC ".v8f64");
+_CLC_OVERLOAD double16 FUNCTION(double16 d) __asm(INTRINSIC ".v16f64");
+#endif
+
+#undef FUNCTION
+#undef INTRINSIC
diff --git a/generic/include/clc/relational/select.h b/generic/include/clc/relational/select.h
new file mode 100644
index 0000000..33a6909
--- /dev/null
+++ b/generic/include/clc/relational/select.h
@@ -0,0 +1 @@
+#define select(a, b, c) ((c) ? (b) : (a))
diff --git a/generic/include/clc/synchronization/cl_mem_fence_flags.h b/generic/include/clc/synchronization/cl_mem_fence_flags.h
new file mode 100644
index 0000000..c57eb42
--- /dev/null
+++ b/generic/include/clc/synchronization/cl_mem_fence_flags.h
@@ -0,0 +1,4 @@
+typedef uint cl_mem_fence_flags;
+
+#define CLK_LOCAL_MEM_FENCE 1
+#define CLK_GLOBAL_MEM_FENCE 2
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
new file mode 100644
index 0000000..11531fc
--- /dev/null
+++ b/generic/lib/SOURCES
@@ -0,0 +1,8 @@
+geometric/cross.cl
+geometric/dot.cl
+geometric/length.cl
+geometric/normalize.cl
+integer/abs.cl
+integer/add_sat.cl
+integer/add_sat.ll
+integer/add_sat_impl.ll
diff --git a/generic/lib/geometric/cross.cl b/generic/lib/geometric/cross.cl
new file mode 100644
index 0000000..4c1bc6f
--- /dev/null
+++ b/generic/lib/geometric/cross.cl
@@ -0,0 +1,11 @@
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) {
+ return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
+ p0.x*p1.y - p0.y*p1.x);
+}
+
+_CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) {
+ return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
+ p0.x*p1.y - p0.y*p1.x, 0.f);
+}
diff --git a/generic/lib/geometric/dot.cl b/generic/lib/geometric/dot.cl
new file mode 100644
index 0000000..76cc1d2
--- /dev/null
+++ b/generic/lib/geometric/dot.cl
@@ -0,0 +1,17 @@
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
+ return p0*p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
+ return p0.x*p1.x + p0.y*p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+}
diff --git a/generic/lib/geometric/length.cl b/generic/lib/geometric/length.cl
new file mode 100644
index 0000000..957fbfd
--- /dev/null
+++ b/generic/lib/geometric/length.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY "length.inc"
+#include <clc/geometric/floatn.inc>
diff --git a/generic/lib/geometric/length.inc b/generic/lib/geometric/length.inc
new file mode 100644
index 0000000..66d1604
--- /dev/null
+++ b/generic/lib/geometric/length.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF float length(FLOATN p) {
+ return native_sqrt(dot(p, p));
+}
diff --git a/generic/lib/geometric/normalize.cl b/generic/lib/geometric/normalize.cl
new file mode 100644
index 0000000..95d327c
--- /dev/null
+++ b/generic/lib/geometric/normalize.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY "normalize.inc"
+#include <clc/geometric/floatn.inc>
diff --git a/generic/lib/geometric/normalize.inc b/generic/lib/geometric/normalize.inc
new file mode 100644
index 0000000..a23908b
--- /dev/null
+++ b/generic/lib/geometric/normalize.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF FLOATN normalize(FLOATN p) {
+ return p/length(p);
+}
diff --git a/generic/lib/integer/abs.cl b/generic/lib/integer/abs.cl
new file mode 100644
index 0000000..86f1a34
--- /dev/null
+++ b/generic/lib/integer/abs.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY <abs.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/lib/integer/abs.inc b/generic/lib/integer/abs.inc
new file mode 100644
index 0000000..fff6691
--- /dev/null
+++ b/generic/lib/integer/abs.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF UGENTYPE abs(GENTYPE x) {
+ return __builtin_astype((GENTYPE)(x > (GENTYPE)(0) ? x : -x), UGENTYPE);
+}
diff --git a/generic/lib/integer/abs_diff.cl b/generic/lib/integer/abs_diff.cl
new file mode 100644
index 0000000..c9ca821
--- /dev/null
+++ b/generic/lib/integer/abs_diff.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY <abs_diff.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/lib/integer/abs_diff.inc b/generic/lib/integer/abs_diff.inc
new file mode 100644
index 0000000..93efdba
--- /dev/null
+++ b/generic/lib/integer/abs_diff.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF UGENTYPE abs_diff(GENTYPE x) {
+ return __builtin_astype((GENTYPE)(x > y ? x-y : y-x), UGENTYPE);
+}
diff --git a/generic/lib/integer/add_sat.cl b/generic/lib/integer/add_sat.cl
new file mode 100644
index 0000000..aae2e7f
--- /dev/null
+++ b/generic/lib/integer/add_sat.cl
@@ -0,0 +1,52 @@
+#include <clc/clc.h>
+
+// From add_sat.ll
+_CLC_DECL char __clc_add_sat_s8(char, char);
+_CLC_DECL char __clc_add_sat_u8(uchar, uchar);
+_CLC_DECL char __clc_add_sat_s16(short, short);
+_CLC_DECL char __clc_add_sat_u16(ushort, ushort);
+_CLC_DECL char __clc_add_sat_s32(int, int);
+_CLC_DECL char __clc_add_sat_u32(uint, uint);
+_CLC_DECL char __clc_add_sat_s64(long, long);
+_CLC_DECL char __clc_add_sat_u64(ulong, ulong);
+
+_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) {
+ return __clc_add_sat_s8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) {
+ return __clc_add_sat_u8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) {
+ return __clc_add_sat_s16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) {
+ return __clc_add_sat_u16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) {
+ return __clc_add_sat_s32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) {
+ return __clc_add_sat_u32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) {
+ return __clc_add_sat_s64(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) {
+ return __clc_add_sat_u64(x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, add_sat, uchar, uchar)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, add_sat, short, short)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, add_sat, ushort, ushort)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, add_sat, int, int)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, add_sat, uint, uint)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, add_sat, long, long)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, add_sat, ulong, ulong)
diff --git a/generic/lib/integer/add_sat.ll b/generic/lib/integer/add_sat.ll
new file mode 100644
index 0000000..d6814c3
--- /dev/null
+++ b/generic/lib/integer/add_sat.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+
+define linkonce_odr i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+
+define linkonce_odr i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+
+define linkonce_odr i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+
+define linkonce_odr i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+
+define linkonce_odr i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+
+define linkonce_odr i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+
+define linkonce_odr i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+
+define linkonce_odr i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/generic/lib/integer/add_sat_impl.ll b/generic/lib/integer/add_sat_impl.ll
new file mode 100644
index 0000000..92f4c53
--- /dev/null
+++ b/generic/lib/integer/add_sat_impl.ll
@@ -0,0 +1,83 @@
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+
+define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %x.msb = ashr i8 %x, 7
+ %x.limit = xor i8 %x.msb, 127
+ %sat = select i1 %over, i8 %x.limit, i8 %res
+ ret i8 %sat
+}
+
+define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %sat = select i1 %over, i8 -1, i8 %res
+ ret i8 %sat
+}
+
+declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
+declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
+
+define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %x.msb = ashr i16 %x, 15
+ %x.limit = xor i16 %x.msb, 32767
+ %sat = select i1 %over, i16 %x.limit, i16 %res
+ ret i16 %sat
+}
+
+define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %sat = select i1 %over, i16 -1, i16 %res
+ ret i16 %sat
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
+
+define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %x.msb = ashr i32 %x, 31
+ %x.limit = xor i32 %x.msb, 2147483647
+ %sat = select i1 %over, i32 %x.limit, i32 %res
+ ret i32 %sat
+}
+
+define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %sat = select i1 %over, i32 -1, i32 %res
+ ret i32 %sat
+}
+
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
+
+define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %x.msb = ashr i64 %x, 63
+ %x.limit = xor i64 %x.msb, 9223372036854775807
+ %sat = select i1 %over, i64 %x.limit, i64 %res
+ ret i64 %sat
+}
+
+define linkonce_odr i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %sat = select i1 %over, i64 -1, i64 %res
+ ret i64 %sat
+}