summaryrefslogtreecommitdiff
path: root/generic/lib
diff options
context:
space:
mode:
authorAaron Watry <awatry@gmail.com>2013-03-23 12:04:06 -0500
committerTom Stellard <thomas.stellard@amd.com>2013-04-08 07:12:55 -0700
commite2f2f9498455eb3130b6f4b4732574771553b7e1 (patch)
tree849ffb13d719112681b3791566c42b10527b7fb6 /generic/lib
parent2ae3fd1f635b3a23d5bb49ac3551fd0bc848bac0 (diff)
libclc: implement rotate builtin
This implementation does a lot of bit shifting and masking. Suffice to say, this is somewhat suboptimal... but it does look to produce correct results (after the piglit tests were corrected for sign extension issues). Someone who knows LLVM better than I could re-write this more efficiently.
Diffstat (limited to 'generic/lib')
-rw-r--r--generic/lib/SOURCES1
-rw-r--r--generic/lib/integer/rotate.cl4
-rw-r--r--generic/lib/integer/rotate.inc35
3 files changed, 40 insertions, 0 deletions
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index f639c83..495b3e7 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -8,6 +8,7 @@ integer/abs_diff.cl
integer/add_sat.cl
integer/add_sat.ll
integer/add_sat_impl.ll
+integer/rotate.cl
integer/sub_sat.cl
integer/sub_sat.ll
integer/sub_sat_impl.ll
diff --git a/generic/lib/integer/rotate.cl b/generic/lib/integer/rotate.cl
new file mode 100644
index 0000000..d7eff2b
--- /dev/null
+++ b/generic/lib/integer/rotate.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY <rotate.inc>
+#include <clc/integer/gentype.inc>
diff --git a/generic/lib/integer/rotate.inc b/generic/lib/integer/rotate.inc
new file mode 100644
index 0000000..e83dd51
--- /dev/null
+++ b/generic/lib/integer/rotate.inc
@@ -0,0 +1,35 @@
+/**
+ * Not necessarily optimal... but it produces correct results (at least for int)
+ * If we're lucky, LLVM will recognize the pattern and produce rotate
+ * instructions:
+ * http://llvm.1065342.n5.nabble.com/rotate-td47679.html
+ *
+ * Eventually, someone should feel free to implement an llvm-specific version
+ */
+
+_CLC_OVERLOAD _CLC_DEF GENTYPE rotate(GENTYPE x, GENTYPE n){
+ //Try to avoid extra work if someone's spinning the value through multiple
+ //full rotations
+ n = n % (GENTYPE)GENSIZE;
+
+ //Determine if we're doing a right or left shift on each component
+ //The actual shift algorithm is based on a rotate right
+ //e.g. a rotate of int by 5 bits becomes rotate right by 26 bits
+ // and a rotate of int by -4 bits becomes rotate right by 4
+ GENTYPE amt = (n > (GENTYPE)0 ? (GENTYPE)GENSIZE - n : (GENTYPE)0 - n );
+
+ //Calculate the bits that will wrap
+ GENTYPE mask = ( (GENTYPE)1 << amt ) - (GENTYPE)1;
+ GENTYPE wrapped_bits = x & mask;
+
+ //Shift the input value right and then AND a mask that eliminates
+ //sign-extension interference
+ //if the rotate amount is 0, just use ~0 for a mask
+ GENTYPE se_mask = !amt ? ~((GENTYPE)0) :
+ ( ( (GENTYPE)1 << ((GENTYPE)GENSIZE - amt) ) - (GENTYPE)1 );
+ GENTYPE unwrapped_bits = x >> amt;
+ unwrapped_bits &= se_mask;
+
+ //Finally shift the input right after moving the wrapped bits into position
+ return unwrapped_bits | (wrapped_bits << ( (GENTYPE)GENSIZE - amt ) );
+} \ No newline at end of file