summaryrefslogtreecommitdiff
path: root/generic/lib
diff options
context:
space:
mode:
Diffstat (limited to 'generic/lib')
-rw-r--r--generic/lib/SOURCES1
-rw-r--r--generic/lib/integer/rotate.cl4
-rw-r--r--generic/lib/integer/rotate.inc35
3 files changed, 40 insertions, 0 deletions
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index f639c83..495b3e7 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -8,6 +8,7 @@ integer/abs_diff.cl
integer/add_sat.cl
integer/add_sat.ll
integer/add_sat_impl.ll
+integer/rotate.cl
integer/sub_sat.cl
integer/sub_sat.ll
integer/sub_sat_impl.ll
diff --git a/generic/lib/integer/rotate.cl b/generic/lib/integer/rotate.cl
new file mode 100644
index 0000000..d7eff2b
--- /dev/null
+++ b/generic/lib/integer/rotate.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY <rotate.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/lib/integer/rotate.inc b/generic/lib/integer/rotate.inc
new file mode 100644
index 0000000..e83dd51
--- /dev/null
+++ b/generic/lib/integer/rotate.inc
@@ -0,0 +1,35 @@
+/**
+ * Not necessarily optimal... but it produces correct results (at least for int)
+ * If we're lucky, LLVM will recognize the pattern and produce rotate
+ * instructions:
+ * http://llvm.1065342.n5.nabble.com/rotate-td47679.html
+ *
+ * Eventually, someone should feel free to implement an llvm-specific version
+ */
+
+_CLC_OVERLOAD _CLC_DEF GENTYPE rotate(GENTYPE x, GENTYPE n){
+ //Try to avoid extra work if someone's spinning the value through multiple
+ //full rotations
+ n = n % (GENTYPE)GENSIZE;
+
+ //Determine if we're doing a right or left shift on each component
+ //The actual shift algorithm is based on a rotate right
+ //e.g. a rotate of int by 5 bits becomes rotate right by 26 bits
+ // and a rotate of int by -4 bits becomes rotate right by 4
+ GENTYPE amt = (n > (GENTYPE)0 ? (GENTYPE)GENSIZE - n : (GENTYPE)0 - n );
+
+ //Calculate the bits that will wrap
+ GENTYPE mask = ( (GENTYPE)1 << amt ) - (GENTYPE)1;
+ GENTYPE wrapped_bits = x & mask;
+
+ //Shift the input value right and then AND a mask that eliminates
+ //sign-extension interference
+ //if the rotate amount is 0, just use ~0 for a mask
+ GENTYPE se_mask = !amt ? ~((GENTYPE)0) :
+ ( ( (GENTYPE)1 << ((GENTYPE)GENSIZE - amt) ) - (GENTYPE)1 );
+ GENTYPE unwrapped_bits = x >> amt;
+ unwrapped_bits &= se_mask;
+
+ //Finally shift the input right after moving the wrapped bits into position
+ return unwrapped_bits | (wrapped_bits << ( (GENTYPE)GENSIZE - amt ) );
+} \ No newline at end of file