From a5680175111af8b293dd9cd07eb8f465dda7d86b Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 1 Jun 2012 17:29:59 +0000 Subject: configure.py: Add an install rule. git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@157821 91177308-0d34-0410-b5e6-96231b3b80d8 --- build/metabuild.py | 6 +++++- configure.py | 37 ++++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/build/metabuild.py b/build/metabuild.py index 65870ac..c675e09 100644 --- a/build/metabuild.py +++ b/build/metabuild.py @@ -9,6 +9,7 @@ class Make(object): self.rules = {} self.rule_text = '' self.all_targets = [] + self.default_targets = [] self.clean_files = [] self.distclean_files = [] self.output.write("""all:: @@ -68,8 +69,11 @@ endif return input return [input] + def default(self, paths): + self.default_targets += self._as_list(paths) + def finish(self): - self.output.write('all:: %s\n\n' % ' '.join(self.all_targets)) + self.output.write('all:: %s\n\n' % ' '.join(self.default_targets or self.all_targets)) self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files)) self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files)) diff --git a/configure.py b/configure.py index ab84a0d..546bbd3 100755 --- a/configure.py +++ b/configure.py @@ -17,6 +17,8 @@ import metabuild p = OptionParser() p.add_option('--with-llvm-config', metavar='PATH', help='use given llvm-config script') +p.add_option('--prefix', metavar='PATH', + help='install to given prefix') p.add_option('-g', metavar='GENERATOR', default='make', help='use given generator (default: make)') (options, args) = p.parse_args() @@ -70,6 +72,9 @@ b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins, manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'), os.path.join(srcdir, 'build', 'ninja_syntax.py')]) +install_files = [] +install_deps = [] + for target in targets: (t_arch, t_vendor, t_os) = target.split('-') archs = [t_arch] @@ -83,12 +88,13 @@ for target in targets: subdirs.append("%s-%s" % (arch, t_os)) subdirs.append(arch) - subdirs = [subdir for subdir in subdirs - if os.path.isdir(os.path.join(srcdir, subdir, 'include')) or - os.path.isfile(os.path.join(srcdir, subdir, 'lib', 'SOURCES'))] + incdirs = filter(os.path.isdir, + [os.path.join(srcdir, subdir, 'include') for subdir in subdirs]) + libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')), + [os.path.join(srcdir, subdir, 'lib') for subdir in subdirs]) - clang_cl_includes = ' '.join(["-I%s" % os.path.join(srcdir, subdir, 'include') - for subdir in subdirs]) + clang_cl_includes = ' '.join(["-I%s" % incdir for incdir in incdirs]) + install_files += [(incdir, incdir[len(srcdir)+1:]) for incdir in incdirs] # The rule for building a .bc file for the specified architecture using clang. clang_bc_flags = "-ccc-host-triple %s -I`dirname $in` %s " \ @@ -101,11 +107,8 @@ for target in targets: objects = [] sources_seen = set() - for subdir in subdirs: - src_libdir = os.path.join(srcdir, subdir, 'lib') - if not os.path.isdir(src_libdir): - continue - subdir_list_file = os.path.join(src_libdir, 'SOURCES') + for libdir in libdirs: + subdir_list_file = os.path.join(libdir, 'SOURCES') manifest_deps.add(subdir_list_file) for src in open(subdir_list_file).readlines(): src = src.rstrip() @@ -113,7 +116,7 @@ for target in targets: sources_seen.add(src) obj = os.path.join(target, 'lib', src + '.bc') objects.append(obj) - src_file = os.path.join(src_libdir, src) + src_file = os.path.join(libdir, src) ext = os.path.splitext(src)[1] if ext == '.ll': b.build(obj, 'LLVM_AS', src_file) @@ -126,6 +129,18 @@ for target in targets: b.build(builtins_link_bc, "LLVM_LINK", objects) b.build(builtins_opt_bc, "OPT", builtins_link_bc) b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins) + install_files.append((builtins_bc, builtins_bc)) + install_deps.append(builtins_bc) + b.default(builtins_bc) + +if options.prefix: + install_cmd = ' && '.join(['mkdir -p %(dst)s && cp -r %(src)s %(dst)s' % + {'src': file, + 'dst': os.path.join(options.prefix, + os.path.dirname(dest))} + for (file, dest) in install_files]) + b.rule('install', command = install_cmd, description = 'INSTALL') + b.build('install', 'install', install_deps) b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE', generator = True) -- cgit v1.2.3 From 05edc47f68b94fb1fc6a6e6f2a5baabb2abd9b59 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sun, 5 Aug 2012 22:24:36 +0000 Subject: Fix declarations of __clc_add_sat_*. Patch by Lei Mou! git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@161311 91177308-0d34-0410-b5e6-96231b3b80d8 --- generic/lib/integer/add_sat.cl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/generic/lib/integer/add_sat.cl b/generic/lib/integer/add_sat.cl index aae2e7f..7eb4e39 100644 --- a/generic/lib/integer/add_sat.cl +++ b/generic/lib/integer/add_sat.cl @@ -1,14 +1,14 @@ #include // From add_sat.ll -_CLC_DECL char __clc_add_sat_s8(char, char); -_CLC_DECL char __clc_add_sat_u8(uchar, uchar); -_CLC_DECL char __clc_add_sat_s16(short, short); -_CLC_DECL char __clc_add_sat_u16(ushort, ushort); -_CLC_DECL char __clc_add_sat_s32(int, int); -_CLC_DECL char __clc_add_sat_u32(uint, uint); -_CLC_DECL char __clc_add_sat_s64(long, long); -_CLC_DECL char __clc_add_sat_u64(ulong, ulong); +_CLC_DECL char __clc_add_sat_s8(char, char); +_CLC_DECL uchar __clc_add_sat_u8(uchar, uchar); +_CLC_DECL short __clc_add_sat_s16(short, short); +_CLC_DECL ushort __clc_add_sat_u16(ushort, ushort); +_CLC_DECL int __clc_add_sat_s32(int, int); +_CLC_DECL uint __clc_add_sat_u32(uint, uint); +_CLC_DECL long __clc_add_sat_s64(long, long); +_CLC_DECL ulong __clc_add_sat_u64(ulong, ulong); _CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) { return __clc_add_sat_s8(x, y); -- cgit v1.2.3 From 3b230ffdb7c8c312e327d18f23e3d91ec8ae0511 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sun, 5 Aug 2012 22:25:12 +0000 Subject: Implement sub_sat builtin. Patch by Lei Mou! git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@161312 91177308-0d34-0410-b5e6-96231b3b80d8 --- generic/include/clc/clc.h | 1 + generic/include/clc/integer/sub_sat.h | 2 + generic/include/clc/integer/sub_sat.inc | 1 + generic/lib/SOURCES | 3 ++ generic/lib/integer/sub_sat.cl | 52 +++++++++++++++++++++ generic/lib/integer/sub_sat.ll | 55 ++++++++++++++++++++++ generic/lib/integer/sub_sat_impl.ll | 83 +++++++++++++++++++++++++++++++++ ptx/lib/SOURCES | 1 + ptx/lib/integer/sub_sat.ll | 55 ++++++++++++++++++++++ test/subsat.cl | 19 ++++++++ 10 files changed, 272 insertions(+) create mode 100644 generic/include/clc/integer/sub_sat.h create mode 100644 generic/include/clc/integer/sub_sat.inc create mode 100644 generic/lib/integer/sub_sat.cl create mode 100644 generic/lib/integer/sub_sat.ll create mode 100644 generic/lib/integer/sub_sat_impl.ll create mode 100644 ptx/lib/integer/sub_sat.ll create mode 100644 test/subsat.cl diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index b0cbd4a..8b41523 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -57,6 +57,7 @@ #include #include #include +#include /* 6.11.5 Geometric Functions */ #include diff --git a/generic/include/clc/integer/sub_sat.h b/generic/include/clc/integer/sub_sat.h new file mode 100644 index 0000000..942274d --- /dev/null +++ b/generic/include/clc/integer/sub_sat.h @@ -0,0 +1,2 @@ +#define BODY +#include diff --git a/generic/include/clc/integer/sub_sat.inc b/generic/include/clc/integer/sub_sat.inc new file mode 100644 index 0000000..3e0f8f9 --- /dev/null +++ b/generic/include/clc/integer/sub_sat.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL GENTYPE sub_sat(GENTYPE x, GENTYPE y); diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES index 0608116..344c865 100644 --- a/generic/lib/SOURCES +++ b/generic/lib/SOURCES @@ -7,5 +7,8 @@ integer/abs.cl integer/add_sat.cl integer/add_sat.ll integer/add_sat_impl.ll +integer/sub_sat.cl +integer/sub_sat.ll +integer/sub_sat_impl.ll math/hypot.cl math/mad.cl diff --git a/generic/lib/integer/sub_sat.cl b/generic/lib/integer/sub_sat.cl new file mode 100644 index 0000000..9555b6d --- /dev/null +++ b/generic/lib/integer/sub_sat.cl @@ -0,0 +1,52 @@ +#include + +// From sub_sat.ll +_CLC_DECL char __clc_sub_sat_s8(char, char); +_CLC_DECL uchar __clc_sub_sat_u8(uchar, uchar); +_CLC_DECL short __clc_sub_sat_s16(short, short); +_CLC_DECL ushort __clc_sub_sat_u16(ushort, ushort); +_CLC_DECL int __clc_sub_sat_s32(int, int); +_CLC_DECL uint __clc_sub_sat_u32(uint, uint); +_CLC_DECL long __clc_sub_sat_s64(long, long); +_CLC_DECL ulong __clc_sub_sat_u64(ulong, ulong); + +_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) { + return __clc_sub_sat_s8(x, y); +} + +_CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) { + return __clc_sub_sat_u8(x, y); +} + +_CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) { + return __clc_sub_sat_s16(x, y); +} + +_CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) { + return __clc_sub_sat_u16(x, y); +} + +_CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) { + return __clc_sub_sat_s32(x, y); +} + +_CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) { + return __clc_sub_sat_u32(x, y); +} + +_CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) { + return __clc_sub_sat_s64(x, y); +} + +_CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) { + return __clc_sub_sat_u64(x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, sub_sat, uchar, uchar) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, sub_sat, short, short) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, sub_sat, ushort, ushort) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, sub_sat, int, int) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, sub_sat, uint, uint) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, sub_sat, long, long) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, sub_sat, ulong, ulong) diff --git a/generic/lib/integer/sub_sat.ll b/generic/lib/integer/sub_sat.ll new file mode 100644 index 0000000..f223a0e --- /dev/null +++ b/generic/lib/integer/sub_sat.ll @@ -0,0 +1,55 @@ +declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + +define linkonce_odr i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + ret i8 %call +} + +declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + +define linkonce_odr i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + ret i8 %call +} + +declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + +define linkonce_odr i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + ret i16 %call +} + +declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + +define linkonce_odr i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + ret i16 %call +} + +declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + +define linkonce_odr i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + ret i32 %call +} + +declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + +define linkonce_odr i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + ret i32 %call +} + +declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + +define linkonce_odr i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + ret i64 %call +} + +declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + +define linkonce_odr i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + ret i64 %call +} diff --git a/generic/lib/integer/sub_sat_impl.ll b/generic/lib/integer/sub_sat_impl.ll new file mode 100644 index 0000000..99abbc3 --- /dev/null +++ b/generic/lib/integer/sub_sat_impl.ll @@ -0,0 +1,83 @@ +declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) +declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) + +define linkonce_odr i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) + %res = extractvalue {i8, i1} %call, 0 + %over = extractvalue {i8, i1} %call, 1 + %x.msb = ashr i8 %x, 7 + %x.limit = xor i8 %x.msb, 127 + %sat = select i1 %over, i8 %x.limit, i8 %res + ret i8 %sat +} + +define linkonce_odr i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %x, i8 %y) + %res = extractvalue {i8, i1} %call, 0 + %over = extractvalue {i8, i1} %call, 1 + %sat = select i1 %over, i8 0, i8 %res + ret i8 %sat +} + +declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16) +declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) + +define linkonce_odr i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %x, i16 %y) + %res = extractvalue {i16, i1} %call, 0 + %over = extractvalue {i16, i1} %call, 1 + %x.msb = ashr i16 %x, 15 + %x.limit = xor i16 %x.msb, 32767 + %sat = select i1 %over, i16 %x.limit, i16 %res + ret i16 %sat +} + +define linkonce_odr i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y) + %res = extractvalue {i16, i1} %call, 0 + %over = extractvalue {i16, i1} %call, 1 + %sat = select i1 %over, i16 0, i16 %res + ret i16 %sat +} + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) + +define linkonce_odr i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 %y) + %res = extractvalue {i32, i1} %call, 0 + %over = extractvalue {i32, i1} %call, 1 + %x.msb = ashr i32 %x, 31 + %x.limit = xor i32 %x.msb, 2147483647 + %sat = select i1 %over, i32 %x.limit, i32 %res + ret i32 %sat +} + +define linkonce_odr i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y) + %res = extractvalue {i32, i1} %call, 0 + %over = extractvalue {i32, i1} %call, 1 + %sat = select i1 %over, i32 0, i32 %res + ret i32 %sat +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) + +define linkonce_odr i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %x, i64 %y) + %res = extractvalue {i64, i1} %call, 0 + %over = extractvalue {i64, i1} %call, 1 + %x.msb = ashr i64 %x, 63 + %x.limit = xor i64 %x.msb, 9223372036854775807 + %sat = select i1 %over, i64 %x.limit, i64 %res + ret i64 %sat +} + +define linkonce_odr i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %x, i64 %y) + %res = extractvalue {i64, i1} %call, 0 + %over = extractvalue {i64, i1} %call, 1 + %sat = select i1 %over, i64 0, i64 %res + ret i64 %sat +} diff --git a/ptx/lib/SOURCES b/ptx/lib/SOURCES index aab8e3f..fb6e17f 100644 --- a/ptx/lib/SOURCES +++ b/ptx/lib/SOURCES @@ -1 +1,2 @@ integer/add_sat.ll +integer/sub_sat.ll \ No newline at end of file diff --git a/ptx/lib/integer/sub_sat.ll b/ptx/lib/integer/sub_sat.ll new file mode 100644 index 0000000..6a51a52 --- /dev/null +++ b/ptx/lib/integer/sub_sat.ll @@ -0,0 +1,55 @@ +declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + +define linkonce_odr ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) + ret i8 %call +} + +declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + +define linkonce_odr ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) + ret i8 %call +} + +declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + +define linkonce_odr ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) + ret i16 %call +} + +declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + +define linkonce_odr ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) + ret i16 %call +} + +declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + +define linkonce_odr ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) + ret i32 %call +} + +declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + +define linkonce_odr ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) + ret i32 %call +} + +declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + +define linkonce_odr ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) + ret i64 %call +} + +declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + +define linkonce_odr ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) + ret i64 %call +} diff --git a/test/subsat.cl b/test/subsat.cl new file mode 100644 index 0000000..a83414b --- /dev/null +++ b/test/subsat.cl @@ -0,0 +1,19 @@ +__kernel void test_subsat_char(char *a, char x, char y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_long(long *a, long x, long y) { + *a = sub_sat(x, y); + return; +} + +__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) { + *a = sub_sat(x, y); + return; +} \ No newline at end of file -- cgit v1.2.3 From a37e2c06b60184ad139cf01f6b37ef2276febf34 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sun, 5 Aug 2012 22:25:37 +0000 Subject: PTX: move implementations of work-item and synchronisation functions to lib, and add header files in generic. Incorporates a patch by Tom Stellard! git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@161313 91177308-0d34-0410-b5e6-96231b3b80d8 --- generic/include/clc/synchronization/barrier.h | 1 + generic/include/clc/workitem/get_global_id.h | 1 + generic/include/clc/workitem/get_global_size.h | 1 + generic/include/clc/workitem/get_group_id.h | 1 + generic/include/clc/workitem/get_local_id.h | 1 + generic/include/clc/workitem/get_local_size.h | 1 + generic/include/clc/workitem/get_num_groups.h | 1 + generic/lib/SOURCES | 2 ++ generic/lib/workitem/get_global_id.cl | 5 +++++ generic/lib/workitem/get_global_size.cl | 5 +++++ ptx-nvidiacl/include/clc/synchronization/barrier.h | 6 ------ ptx-nvidiacl/include/clc/workitem/get_global_id.h | 8 -------- ptx-nvidiacl/include/clc/workitem/get_global_size.h | 8 -------- ptx-nvidiacl/include/clc/workitem/get_group_id.h | 8 -------- ptx-nvidiacl/include/clc/workitem/get_local_id.h | 8 -------- ptx-nvidiacl/include/clc/workitem/get_local_size.h | 8 -------- ptx-nvidiacl/include/clc/workitem/get_num_groups.h | 8 -------- ptx-nvidiacl/lib/SOURCES | 4 ++++ ptx-nvidiacl/lib/synchronization/barrier.cl | 8 ++++++++ ptx-nvidiacl/lib/workitem/get_group_id.cl | 10 ++++++++++ ptx-nvidiacl/lib/workitem/get_local_id.cl | 10 ++++++++++ ptx-nvidiacl/lib/workitem/get_local_size.cl | 10 ++++++++++ ptx-nvidiacl/lib/workitem/get_num_groups.cl | 10 ++++++++++ 23 files changed, 71 insertions(+), 54 deletions(-) create mode 100644 generic/include/clc/synchronization/barrier.h create mode 100644 generic/include/clc/workitem/get_global_id.h create mode 100644 generic/include/clc/workitem/get_global_size.h create mode 100644 generic/include/clc/workitem/get_group_id.h create mode 100644 generic/include/clc/workitem/get_local_id.h create mode 100644 generic/include/clc/workitem/get_local_size.h create mode 100644 generic/include/clc/workitem/get_num_groups.h create mode 100644 generic/lib/workitem/get_global_id.cl create mode 100644 generic/lib/workitem/get_global_size.cl delete mode 100644 ptx-nvidiacl/include/clc/synchronization/barrier.h delete mode 100644 ptx-nvidiacl/include/clc/workitem/get_global_id.h delete mode 100644 ptx-nvidiacl/include/clc/workitem/get_global_size.h delete mode 100644 ptx-nvidiacl/include/clc/workitem/get_group_id.h delete mode 100644 ptx-nvidiacl/include/clc/workitem/get_local_id.h delete mode 100644 ptx-nvidiacl/include/clc/workitem/get_local_size.h delete mode 100644 ptx-nvidiacl/include/clc/workitem/get_num_groups.h create mode 100644 ptx-nvidiacl/lib/synchronization/barrier.cl create mode 100644 ptx-nvidiacl/lib/workitem/get_group_id.cl create mode 100644 ptx-nvidiacl/lib/workitem/get_local_id.cl create mode 100644 ptx-nvidiacl/lib/workitem/get_local_size.cl create mode 100644 ptx-nvidiacl/lib/workitem/get_num_groups.cl diff --git a/generic/include/clc/synchronization/barrier.h b/generic/include/clc/synchronization/barrier.h new file mode 100644 index 0000000..7167a3d --- /dev/null +++ b/generic/include/clc/synchronization/barrier.h @@ -0,0 +1 @@ +_CLC_DECL void barrier(cl_mem_fence_flags flags); diff --git a/generic/include/clc/workitem/get_global_id.h b/generic/include/clc/workitem/get_global_id.h new file mode 100644 index 0000000..92759f1 --- /dev/null +++ b/generic/include/clc/workitem/get_global_id.h @@ -0,0 +1 @@ +_CLC_DECL size_t get_global_id(uint dim); diff --git a/generic/include/clc/workitem/get_global_size.h b/generic/include/clc/workitem/get_global_size.h new file mode 100644 index 0000000..2f83705 --- /dev/null +++ b/generic/include/clc/workitem/get_global_size.h @@ -0,0 +1 @@ +_CLC_DECL size_t get_global_size(uint dim); diff --git a/generic/include/clc/workitem/get_group_id.h b/generic/include/clc/workitem/get_group_id.h new file mode 100644 index 0000000..346c82c --- /dev/null +++ b/generic/include/clc/workitem/get_group_id.h @@ -0,0 +1 @@ +_CLC_DECL size_t get_group_id(uint dim); diff --git a/generic/include/clc/workitem/get_local_id.h b/generic/include/clc/workitem/get_local_id.h new file mode 100644 index 0000000..169aeed --- /dev/null +++ b/generic/include/clc/workitem/get_local_id.h @@ -0,0 +1 @@ +_CLC_DECL size_t get_local_id(uint dim); diff --git a/generic/include/clc/workitem/get_local_size.h b/generic/include/clc/workitem/get_local_size.h new file mode 100644 index 0000000..040ec58 --- /dev/null +++ b/generic/include/clc/workitem/get_local_size.h @@ -0,0 +1 @@ +_CLC_DECL size_t get_local_size(uint dim); diff --git a/generic/include/clc/workitem/get_num_groups.h b/generic/include/clc/workitem/get_num_groups.h new file mode 100644 index 0000000..e555c7e --- /dev/null +++ b/generic/include/clc/workitem/get_num_groups.h @@ -0,0 +1 @@ +_CLC_DECL size_t get_num_groups(uint dim); diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES index 344c865..1d56c40 100644 --- a/generic/lib/SOURCES +++ b/generic/lib/SOURCES @@ -12,3 +12,5 @@ integer/sub_sat.ll integer/sub_sat_impl.ll math/hypot.cl math/mad.cl +workitem/get_global_id.cl +workitem/get_global_size.cl diff --git a/generic/lib/workitem/get_global_id.cl b/generic/lib/workitem/get_global_id.cl new file mode 100644 index 0000000..fdd83d2 --- /dev/null +++ b/generic/lib/workitem/get_global_id.cl @@ -0,0 +1,5 @@ +#include + +_CLC_DEF size_t get_global_id(uint dim) { + return get_group_id(dim)*get_local_size(dim) + get_local_id(dim); +} diff --git a/generic/lib/workitem/get_global_size.cl b/generic/lib/workitem/get_global_size.cl new file mode 100644 index 0000000..5ae649e --- /dev/null +++ b/generic/lib/workitem/get_global_size.cl @@ -0,0 +1,5 @@ +#include + +_CLC_DEF size_t get_global_size(uint dim) { + return get_num_groups(dim)*get_local_size(dim); +} diff --git a/ptx-nvidiacl/include/clc/synchronization/barrier.h b/ptx-nvidiacl/include/clc/synchronization/barrier.h deleted file mode 100644 index cd9f327..0000000 --- a/ptx-nvidiacl/include/clc/synchronization/barrier.h +++ /dev/null @@ -1,6 +0,0 @@ -_CLC_INLINE void barrier(cl_mem_fence_flags flags) { - if (flags & CLK_LOCAL_MEM_FENCE) { - __builtin_ptx_bar_sync(0); - } -} - diff --git a/ptx-nvidiacl/include/clc/workitem/get_global_id.h b/ptx-nvidiacl/include/clc/workitem/get_global_id.h deleted file mode 100644 index 026d2fe..0000000 --- a/ptx-nvidiacl/include/clc/workitem/get_global_id.h +++ /dev/null @@ -1,8 +0,0 @@ -_CLC_INLINE size_t get_global_id(uint dim) { - switch (dim) { - case 0: return __builtin_ptx_read_ctaid_x()*__builtin_ptx_read_ntid_x()+__builtin_ptx_read_tid_x(); - case 1: return __builtin_ptx_read_ctaid_y()*__builtin_ptx_read_ntid_y()+__builtin_ptx_read_tid_y(); - case 2: return __builtin_ptx_read_ctaid_z()*__builtin_ptx_read_ntid_z()+__builtin_ptx_read_tid_z(); - default: return 0; - } -} diff --git a/ptx-nvidiacl/include/clc/workitem/get_global_size.h b/ptx-nvidiacl/include/clc/workitem/get_global_size.h deleted file mode 100644 index 5cd4222..0000000 --- a/ptx-nvidiacl/include/clc/workitem/get_global_size.h +++ /dev/null @@ -1,8 +0,0 @@ -_CLC_INLINE size_t get_global_size(uint dim) { - switch (dim) { - case 0: return __builtin_ptx_read_nctaid_x()*__builtin_ptx_read_ntid_x(); - case 1: return __builtin_ptx_read_nctaid_y()*__builtin_ptx_read_ntid_y(); - case 2: return __builtin_ptx_read_nctaid_z()*__builtin_ptx_read_ntid_z(); - default: return 0; - } -} diff --git a/ptx-nvidiacl/include/clc/workitem/get_group_id.h b/ptx-nvidiacl/include/clc/workitem/get_group_id.h deleted file mode 100644 index 18b1bd4..0000000 --- a/ptx-nvidiacl/include/clc/workitem/get_group_id.h +++ /dev/null @@ -1,8 +0,0 @@ -_CLC_INLINE size_t get_group_id(uint dim) { - switch (dim) { - case 0: return __builtin_ptx_read_ctaid_x(); - case 1: return __builtin_ptx_read_ctaid_y(); - case 2: return __builtin_ptx_read_ctaid_z(); - default: return 0; - } -} diff --git a/ptx-nvidiacl/include/clc/workitem/get_local_id.h b/ptx-nvidiacl/include/clc/workitem/get_local_id.h deleted file mode 100644 index 1b8c776..0000000 --- a/ptx-nvidiacl/include/clc/workitem/get_local_id.h +++ /dev/null @@ -1,8 +0,0 @@ -_CLC_INLINE size_t get_local_id(uint dim) { - switch (dim) { - case 0: return __builtin_ptx_read_tid_x(); - case 1: return __builtin_ptx_read_tid_y(); - case 2: return __builtin_ptx_read_tid_z(); - default: return 0; - } -} diff --git a/ptx-nvidiacl/include/clc/workitem/get_local_size.h b/ptx-nvidiacl/include/clc/workitem/get_local_size.h deleted file mode 100644 index cbc1f6e..0000000 --- a/ptx-nvidiacl/include/clc/workitem/get_local_size.h +++ /dev/null @@ -1,8 +0,0 @@ -_CLC_INLINE size_t get_local_size(uint dim) { - switch (dim) { - case 0: return __builtin_ptx_read_ntid_x(); - case 1: return __builtin_ptx_read_ntid_y(); - case 2: return __builtin_ptx_read_ntid_z(); - default: return 0; - } -} diff --git a/ptx-nvidiacl/include/clc/workitem/get_num_groups.h b/ptx-nvidiacl/include/clc/workitem/get_num_groups.h deleted file mode 100644 index 36ee849..0000000 --- a/ptx-nvidiacl/include/clc/workitem/get_num_groups.h +++ /dev/null @@ -1,8 +0,0 @@ -_CLC_INLINE size_t get_num_groups(uint dim) { - switch (dim) { - case 0: return __builtin_ptx_read_nctaid_x(); - case 1: return __builtin_ptx_read_nctaid_y(); - case 2: return __builtin_ptx_read_nctaid_z(); - default: return 0; - } -} diff --git a/ptx-nvidiacl/lib/SOURCES b/ptx-nvidiacl/lib/SOURCES index e69de29..1a96a1a 100644 --- a/ptx-nvidiacl/lib/SOURCES +++ b/ptx-nvidiacl/lib/SOURCES @@ -0,0 +1,4 @@ +workitem/get_group_id.cl +workitem/get_local_id.cl +workitem/get_local_size.cl +workitem/get_num_groups.cl diff --git a/ptx-nvidiacl/lib/synchronization/barrier.cl b/ptx-nvidiacl/lib/synchronization/barrier.cl new file mode 100644 index 0000000..fb36c26 --- /dev/null +++ b/ptx-nvidiacl/lib/synchronization/barrier.cl @@ -0,0 +1,8 @@ +#include + +_CLC_DEF void barrier(cl_mem_fence_flags flags) { + if (flags & CLK_LOCAL_MEM_FENCE) { + __builtin_ptx_bar_sync(0); + } +} + diff --git a/ptx-nvidiacl/lib/workitem/get_group_id.cl b/ptx-nvidiacl/lib/workitem/get_group_id.cl new file mode 100644 index 0000000..2b35b4e --- /dev/null +++ b/ptx-nvidiacl/lib/workitem/get_group_id.cl @@ -0,0 +1,10 @@ +#include + +_CLC_DEF size_t get_group_id(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_ctaid_x(); + case 1: return __builtin_ptx_read_ctaid_y(); + case 2: return __builtin_ptx_read_ctaid_z(); + default: return 0; + } +} diff --git a/ptx-nvidiacl/lib/workitem/get_local_id.cl b/ptx-nvidiacl/lib/workitem/get_local_id.cl new file mode 100644 index 0000000..f0cfdc0 --- /dev/null +++ b/ptx-nvidiacl/lib/workitem/get_local_id.cl @@ -0,0 +1,10 @@ +#include + +_CLC_DEF size_t get_local_id(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_tid_x(); + case 1: return __builtin_ptx_read_tid_y(); + case 2: return __builtin_ptx_read_tid_z(); + default: return 0; + } +} diff --git a/ptx-nvidiacl/lib/workitem/get_local_size.cl b/ptx-nvidiacl/lib/workitem/get_local_size.cl new file mode 100644 index 0000000..c3f5425 --- /dev/null +++ b/ptx-nvidiacl/lib/workitem/get_local_size.cl @@ -0,0 +1,10 @@ +#include + +_CLC_DEF size_t get_local_size(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_ntid_x(); + case 1: return __builtin_ptx_read_ntid_y(); + case 2: return __builtin_ptx_read_ntid_z(); + default: return 0; + } +} diff --git a/ptx-nvidiacl/lib/workitem/get_num_groups.cl b/ptx-nvidiacl/lib/workitem/get_num_groups.cl new file mode 100644 index 0000000..90bdc2e --- /dev/null +++ b/ptx-nvidiacl/lib/workitem/get_num_groups.cl @@ -0,0 +1,10 @@ +#include + +_CLC_DEF size_t get_num_groups(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_nctaid_x(); + case 1: return __builtin_ptx_read_nctaid_y(); + case 2: return __builtin_ptx_read_nctaid_z(); + default: return 0; + } +} -- cgit v1.2.3 From 833b5f1a04b3137b403a01835d1685576a24bac5 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sun, 5 Aug 2012 22:25:48 +0000 Subject: Do not use linkonce_odr linkage in .ll files. This prevented them from being linked into the library under lazy linkage. git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@161314 91177308-0d34-0410-b5e6-96231b3b80d8 --- generic/lib/integer/add_sat.ll | 16 ++++++++-------- generic/lib/integer/add_sat_impl.ll | 16 ++++++++-------- generic/lib/integer/sub_sat.ll | 16 ++++++++-------- generic/lib/integer/sub_sat_impl.ll | 16 ++++++++-------- ptx/lib/integer/add_sat.ll | 16 ++++++++-------- ptx/lib/integer/sub_sat.ll | 16 ++++++++-------- 6 files changed, 48 insertions(+), 48 deletions(-) diff --git a/generic/lib/integer/add_sat.ll b/generic/lib/integer/add_sat.ll index d6814c3..bcbe4c0 100644 --- a/generic/lib/integer/add_sat.ll +++ b/generic/lib/integer/add_sat.ll @@ -1,55 +1,55 @@ declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) -define linkonce_odr i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) ret i8 %call } declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) -define linkonce_odr i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) ret i8 %call } declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) -define linkonce_odr i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) ret i16 %call } declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) -define linkonce_odr i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) ret i16 %call } declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) -define linkonce_odr i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) ret i32 %call } declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) -define linkonce_odr i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) ret i32 %call } declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) -define linkonce_odr i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) ret i64 %call } declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) -define linkonce_odr i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) ret i64 %call } diff --git a/generic/lib/integer/add_sat_impl.ll b/generic/lib/integer/add_sat_impl.ll index 92f4c53..c150ecb 100644 --- a/generic/lib/integer/add_sat_impl.ll +++ b/generic/lib/integer/add_sat_impl.ll @@ -1,7 +1,7 @@ declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) -define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) %res = extractvalue {i8, i1} %call, 0 %over = extractvalue {i8, i1} %call, 1 @@ -11,7 +11,7 @@ define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone al ret i8 %sat } -define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) %res = extractvalue {i8, i1} %call, 0 %over = extractvalue {i8, i1} %call, 1 @@ -22,7 +22,7 @@ define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone al declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) -define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y) %res = extractvalue {i16, i1} %call, 0 %over = extractvalue {i16, i1} %call, 1 @@ -32,7 +32,7 @@ define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnon ret i16 %sat } -define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y) %res = extractvalue {i16, i1} %call, 0 %over = extractvalue {i16, i1} %call, 1 @@ -43,7 +43,7 @@ define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnon declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) -define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) %res = extractvalue {i32, i1} %call, 0 %over = extractvalue {i32, i1} %call, 1 @@ -53,7 +53,7 @@ define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnon ret i32 %sat } -define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) %res = extractvalue {i32, i1} %call, 0 %over = extractvalue {i32, i1} %call, 1 @@ -64,7 +64,7 @@ define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnon declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) -define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y) %res = extractvalue {i64, i1} %call, 0 %over = extractvalue {i64, i1} %call, 1 @@ -74,7 +74,7 @@ define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnon ret i64 %sat } -define linkonce_odr i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y) %res = extractvalue {i64, i1} %call, 0 %over = extractvalue {i64, i1} %call, 1 diff --git a/generic/lib/integer/sub_sat.ll b/generic/lib/integer/sub_sat.ll index f223a0e..7252574 100644 --- a/generic/lib/integer/sub_sat.ll +++ b/generic/lib/integer/sub_sat.ll @@ -1,55 +1,55 @@ declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) -define linkonce_odr i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) ret i8 %call } declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) -define linkonce_odr i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) ret i8 %call } declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) -define linkonce_odr i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) ret i16 %call } declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) -define linkonce_odr i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) ret i16 %call } declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) -define linkonce_odr i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) ret i32 %call } declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) -define linkonce_odr i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) ret i32 %call } declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) -define linkonce_odr i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) ret i64 %call } declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) -define linkonce_odr i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) ret i64 %call } diff --git a/generic/lib/integer/sub_sat_impl.ll b/generic/lib/integer/sub_sat_impl.ll index 99abbc3..e82b632 100644 --- a/generic/lib/integer/sub_sat_impl.ll +++ b/generic/lib/integer/sub_sat_impl.ll @@ -1,7 +1,7 @@ declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) -define linkonce_odr i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) %res = extractvalue {i8, i1} %call, 0 %over = extractvalue {i8, i1} %call, 1 @@ -11,7 +11,7 @@ define linkonce_odr i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone al ret i8 %sat } -define linkonce_odr i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %x, i8 %y) %res = extractvalue {i8, i1} %call, 0 %over = extractvalue {i8, i1} %call, 1 @@ -22,7 +22,7 @@ define linkonce_odr i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone al declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16) declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) -define linkonce_odr i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %x, i16 %y) %res = extractvalue {i16, i1} %call, 0 %over = extractvalue {i16, i1} %call, 1 @@ -32,7 +32,7 @@ define linkonce_odr i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnon ret i16 %sat } -define linkonce_odr i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y) %res = extractvalue {i16, i1} %call, 0 %over = extractvalue {i16, i1} %call, 1 @@ -43,7 +43,7 @@ define linkonce_odr i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnon declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) -define linkonce_odr i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 %y) %res = extractvalue {i32, i1} %call, 0 %over = extractvalue {i32, i1} %call, 1 @@ -53,7 +53,7 @@ define linkonce_odr i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnon ret i32 %sat } -define linkonce_odr i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y) %res = extractvalue {i32, i1} %call, 0 %over = extractvalue {i32, i1} %call, 1 @@ -64,7 +64,7 @@ define linkonce_odr i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnon declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) -define linkonce_odr i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %x, i64 %y) %res = extractvalue {i64, i1} %call, 0 %over = extractvalue {i64, i1} %call, 1 @@ -74,7 +74,7 @@ define linkonce_odr i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnon ret i64 %sat } -define linkonce_odr i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %x, i64 %y) %res = extractvalue {i64, i1} %call, 0 %over = extractvalue {i64, i1} %call, 1 diff --git a/ptx/lib/integer/add_sat.ll b/ptx/lib/integer/add_sat.ll index 9b8311c..f887962 100644 --- a/ptx/lib/integer/add_sat.ll +++ b/ptx/lib/integer/add_sat.ll @@ -1,55 +1,55 @@ declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) -define linkonce_odr ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) ret i8 %call } declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) -define linkonce_odr ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) ret i8 %call } declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) -define linkonce_odr ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) ret i16 %call } declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) -define linkonce_odr ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) ret i16 %call } declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) -define linkonce_odr ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) ret i32 %call } declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) -define linkonce_odr ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) ret i32 %call } declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) -define linkonce_odr ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) ret i64 %call } declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) -define linkonce_odr ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) ret i64 %call } diff --git a/ptx/lib/integer/sub_sat.ll b/ptx/lib/integer/sub_sat.ll index 6a51a52..1a66eb5 100644 --- a/ptx/lib/integer/sub_sat.ll +++ b/ptx/lib/integer/sub_sat.ll @@ -1,55 +1,55 @@ declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) -define linkonce_odr ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) ret i8 %call } declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) -define linkonce_odr ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { +define ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) ret i8 %call } declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) -define linkonce_odr ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) ret i16 %call } declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) -define linkonce_odr ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { +define ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) ret i16 %call } declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) -define linkonce_odr ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) ret i32 %call } declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) -define linkonce_odr ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { +define ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) ret i32 %call } declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) -define linkonce_odr ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) ret i64 %call } declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) -define linkonce_odr ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { +define ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) ret i64 %call } -- cgit v1.2.3 From 1e8ef6c06fd23e593f67fb5b8e4bb89eb657e8d6 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 21 Aug 2012 10:48:21 +0000 Subject: Add floor builtin. Patch by Cassie Epps! git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@162273 91177308-0d34-0410-b5e6-96231b3b80d8 --- generic/include/clc/clc.h | 1 + generic/include/clc/math/floor.h | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 generic/include/clc/math/floor.h diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index 8b41523..4f49760 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/generic/include/clc/math/floor.h b/generic/include/clc/math/floor.h new file mode 100644 index 0000000..abb7c2a --- /dev/null +++ b/generic/include/clc/math/floor.h @@ -0,0 +1,6 @@ +#undef floor +#define floor __clc_floor + +#define FUNCTION __clc_floor +#define INTRINSIC "llvm.floor" +#include -- cgit v1.2.3 From a9652818973fd27ac904b60dabe12d75ab192ff9 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 21 Aug 2012 10:48:35 +0000 Subject: Add rsqrt builtin. Based on patch by Cassie Epps! git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@162274 91177308-0d34-0410-b5e6-96231b3b80d8 --- generic/include/clc/clc.h | 1 + generic/include/clc/math/rsqrt.h | 1 + test/rsqrt.cl | 6 ++++++ 3 files changed, 8 insertions(+) create mode 100644 generic/include/clc/math/rsqrt.h create mode 100644 test/rsqrt.cl diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index 4f49760..565b505 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -53,6 +53,7 @@ #include #include #include +#include /* 6.11.3 Integer Functions */ #include diff --git a/generic/include/clc/math/rsqrt.h b/generic/include/clc/math/rsqrt.h new file mode 100644 index 0000000..8fd2cbf --- /dev/null +++ b/generic/include/clc/math/rsqrt.h @@ -0,0 +1 @@ +#define rsqrt(x) (1.f/sqrt(x)) diff --git a/test/rsqrt.cl b/test/rsqrt.cl new file mode 100644 index 0000000..13ad216 --- /dev/null +++ b/test/rsqrt.cl @@ -0,0 +1,6 @@ +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +__kernel void foo(float4 *x, double4 *y) { + x[1] = rsqrt(x[0]); + y[1] = rsqrt(y[0]); +} -- cgit v1.2.3 From 3290bdd44b38ca1528e7ac7da953a739289700a9 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 5 Sep 2012 18:13:55 +0000 Subject: Add barrier.cl to SOURCES, spotted by Jin Wang. git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@163227 91177308-0d34-0410-b5e6-96231b3b80d8 --- ptx-nvidiacl/lib/SOURCES | 1 + 1 file changed, 1 insertion(+) diff --git a/ptx-nvidiacl/lib/SOURCES b/ptx-nvidiacl/lib/SOURCES index 1a96a1a..7cdbd85 100644 --- a/ptx-nvidiacl/lib/SOURCES +++ b/ptx-nvidiacl/lib/SOURCES @@ -1,3 +1,4 @@ +synchronization/barrier.cl workitem/get_group_id.cl workitem/get_local_id.cl workitem/get_local_size.cl -- cgit v1.2.3 From 5c2bfa665589a07301860bb2da3912ce5a8323db Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 8 Oct 2012 03:39:05 +0000 Subject: Add native_powr builtin. Patch by Tom Stellard! git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@165385 91177308-0d34-0410-b5e6-96231b3b80d8 --- generic/include/clc/clc.h | 1 + generic/include/clc/math/native_powr.h | 1 + 2 files changed, 2 insertions(+) create mode 100644 generic/include/clc/math/native_powr.h diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index 565b505..fc41f76 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include diff --git a/generic/include/clc/math/native_powr.h b/generic/include/clc/math/native_powr.h new file mode 100644 index 0000000..e8a37d9 --- /dev/null +++ b/generic/include/clc/math/native_powr.h @@ -0,0 +1 @@ +#define native_powr pow -- cgit v1.2.3 From 14e5a4f2116dd512d3bd6489d55760aa487cc3dd Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 8 Oct 2012 03:39:21 +0000 Subject: Implement any() builtin. Patch by Tom Stellard! git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@165386 91177308-0d34-0410-b5e6-96231b3b80d8 --- generic/include/clc/clc.h | 1 + generic/include/clc/relational/any.h | 16 ++++++++++++++++ generic/lib/SOURCES | 1 + generic/lib/relational/any.cl | 30 ++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 generic/include/clc/relational/any.h create mode 100644 generic/lib/relational/any.cl diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index fc41f76..315693b 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -69,6 +69,7 @@ #include /* 6.11.6 Relational Functions */ +#include #include /* 6.11.8 Synchronization Functions */ diff --git a/generic/include/clc/relational/any.h b/generic/include/clc/relational/any.h new file mode 100644 index 0000000..4687ed2 --- /dev/null +++ b/generic/include/clc/relational/any.h @@ -0,0 +1,16 @@ + +#define _CLC_ANY_DECL(TYPE) \ + _CLC_OVERLOAD _CLC_DECL int any(TYPE v); + +#define _CLC_VECTOR_ANY_DECL(TYPE) \ + _CLC_ANY_DECL(TYPE) \ + _CLC_ANY_DECL(TYPE##2) \ + _CLC_ANY_DECL(TYPE##3) \ + _CLC_ANY_DECL(TYPE##4) \ + _CLC_ANY_DECL(TYPE##8) \ + _CLC_ANY_DECL(TYPE##16) + +_CLC_VECTOR_ANY_DECL(char) +_CLC_VECTOR_ANY_DECL(short) +_CLC_VECTOR_ANY_DECL(int) +_CLC_VECTOR_ANY_DECL(long) diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES index 1d56c40..d29ca1f 100644 --- a/generic/lib/SOURCES +++ b/generic/lib/SOURCES @@ -12,5 +12,6 @@ integer/sub_sat.ll integer/sub_sat_impl.ll math/hypot.cl math/mad.cl +relational/any.cl workitem/get_global_id.cl workitem/get_global_size.cl diff --git a/generic/lib/relational/any.cl b/generic/lib/relational/any.cl new file mode 100644 index 0000000..4d37210 --- /dev/null +++ b/generic/lib/relational/any.cl @@ -0,0 +1,30 @@ +#include + +#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) +#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1)) +#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2)) +#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3)) +#define _CLC_ANY8(v) (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) \ + | _CLC_ANY((v).s6) | _CLC_ANY((v).s7)) +#define _CLC_ANY16(v) (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) \ + | _CLC_ANY((v).sA) | _CLC_ANY((v).sB) \ + | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) \ + | _CLC_ANY((v).sE) | _CLC_ANY((v).sf)) + + +#define ANY_ID(TYPE) \ + _CLC_OVERLOAD _CLC_DEF int any(TYPE v) + +#define ANY_VECTORIZE(TYPE) \ + ANY_ID(TYPE) { return _CLC_ANY(v); } \ + ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \ + ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \ + ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \ + ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \ + ANY_ID(TYPE##16) { return _CLC_ANY16(v); } + +ANY_VECTORIZE(char) +ANY_VECTORIZE(short) +ANY_VECTORIZE(int) +ANY_VECTORIZE(long) + -- cgit v1.2.3