diff options
Diffstat (limited to 'generic/lib')
-rw-r--r-- | generic/lib/SOURCES | 1 | ||||
-rw-r--r-- | generic/lib/integer/rotate.cl | 4 | ||||
-rw-r--r-- | generic/lib/integer/rotate.inc | 35 |
3 files changed, 40 insertions, 0 deletions
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES index f639c83..495b3e7 100644 --- a/generic/lib/SOURCES +++ b/generic/lib/SOURCES @@ -8,6 +8,7 @@ integer/abs_diff.cl integer/add_sat.cl integer/add_sat.ll integer/add_sat_impl.ll +integer/rotate.cl integer/sub_sat.cl integer/sub_sat.ll integer/sub_sat_impl.ll diff --git a/generic/lib/integer/rotate.cl b/generic/lib/integer/rotate.cl new file mode 100644 index 0000000..d7eff2b --- /dev/null +++ b/generic/lib/integer/rotate.cl @@ -0,0 +1,4 @@ +#include <clc/clc.h> + +#define BODY <rotate.inc> +#include <clc/integer/gentype.inc> diff --git a/generic/lib/integer/rotate.inc b/generic/lib/integer/rotate.inc new file mode 100644 index 0000000..e83dd51 --- /dev/null +++ b/generic/lib/integer/rotate.inc @@ -0,0 +1,35 @@ +/** + * Not necessarily optimal... but it produces correct results (at least for int) + * If we're lucky, LLVM will recognize the pattern and produce rotate + * instructions: + * http://llvm.1065342.n5.nabble.com/rotate-td47679.html + * + * Eventually, someone should feel free to implement an llvm-specific version + */ + +_CLC_OVERLOAD _CLC_DEF GENTYPE rotate(GENTYPE x, GENTYPE n){ + //Try to avoid extra work if someone's spinning the value through multiple + //full rotations + n = n % (GENTYPE)GENSIZE; + + //Determine if we're doing a right or left shift on each component + //The actual shift algorithm is based on a rotate right + //e.g. a rotate of int by 5 bits becomes rotate right by 26 bits + // and a rotate of int by -4 bits becomes rotate right by 4 + GENTYPE amt = (n > (GENTYPE)0 ? (GENTYPE)GENSIZE - n : (GENTYPE)0 - n ); + + //Calculate the bits that will wrap + GENTYPE mask = ( (GENTYPE)1 << amt ) - (GENTYPE)1; + GENTYPE wrapped_bits = x & mask; + + //Shift the input value right and then AND a mask that eliminates + //sign-extension interference + //if the rotate amount is 0, just use ~0 for a mask + GENTYPE se_mask = !amt ? ~((GENTYPE)0) : + ( ( (GENTYPE)1 << ((GENTYPE)GENSIZE - amt) ) - (GENTYPE)1 ); + GENTYPE unwrapped_bits = x >> amt; + unwrapped_bits &= se_mask; + + //Finally shift the input right after moving the wrapped bits into position + return unwrapped_bits | (wrapped_bits << ( (GENTYPE)GENSIZE - amt ) ); +}
\ No newline at end of file |