summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp')
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp1125
1 files changed, 0 insertions, 1125 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
deleted file mode 100644
index 8080a40a1f9..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ /dev/null
@@ -1,1125 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder_misc.cpp
- *
- * @brief Implementation for miscellaneous builder functions
- *
- * Notes:
- *
- ******************************************************************************/
-#include "jit_pch.hpp"
-#include "builder.h"
-#include "common/rdtsc_buckets.h"
-
-#include <cstdarg>
-
-extern "C" void CallPrint(const char* fmt, ...);
-
-namespace SwrJit
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Convert an IEEE 754 32-bit single precision float to an
- /// 16 bit float with 5 exponent bits and a variable
- /// number of mantissa bits.
- /// @param val - 32-bit float
- /// @todo Maybe move this outside of this file into a header?
- static uint16_t ConvertFloat32ToFloat16(float val)
- {
- uint32_t sign, exp, mant;
- uint32_t roundBits;
-
- // Extract the sign, exponent, and mantissa
- uint32_t uf = *(uint32_t*)&val;
- sign = (uf & 0x80000000) >> 31;
- exp = (uf & 0x7F800000) >> 23;
- mant = uf & 0x007FFFFF;
-
- // Check for out of range
- if (std::isnan(val))
- {
- exp = 0x1F;
- mant = 0x200;
- sign = 1; // set the sign bit for NANs
- }
- else if (std::isinf(val))
- {
- exp = 0x1f;
- mant = 0x0;
- }
- else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
- {
- exp = 0x1E;
- mant = 0x3FF;
- }
- else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
- {
- mant |= 0x00800000;
- for (; exp <= 0x70; mant >>= 1, exp++)
- ;
- exp = 0;
- mant = mant >> 13;
- }
- else if (exp < 0x66) // Too small to represent -> Zero
- {
- exp = 0;
- mant = 0;
- }
- else
- {
- // Saves bits that will be shifted off for rounding
- roundBits = mant & 0x1FFFu;
- // convert exponent and mantissa to 16 bit format
- exp = exp - 0x70;
- mant = mant >> 13;
-
- // Essentially RTZ, but round up if off by only 1 lsb
- if (roundBits == 0x1FFFu)
- {
- mant++;
- // check for overflow
- if ((mant & 0xC00u) != 0)
- exp++;
- // make sure only the needed bits are used
- mant &= 0x3FF;
- }
- }
-
- uint32_t tmpVal = (sign << 15) | (exp << 10) | mant;
- return (uint16_t)tmpVal;
- }
-
- Constant* Builder::C(bool i) { return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0)); }
-
- Constant* Builder::C(char i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
-
- Constant* Builder::C(uint8_t i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
-
- Constant* Builder::C(int i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
-
- Constant* Builder::C(int64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
-
- Constant* Builder::C(uint16_t i) { return ConstantInt::get(mInt16Ty, i); }
-
- Constant* Builder::C(uint32_t i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
-
- Constant* Builder::C(uint64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
-
- Constant* Builder::C(float i) { return ConstantFP::get(IRB()->getFloatTy(), i); }
-
- Constant* Builder::PRED(bool pred)
- {
- return ConstantInt::get(IRB()->getInt1Ty(), (pred ? 1 : 0));
- }
-
- Value* Builder::VIMMED1(uint64_t i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(uint64_t i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1(int i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(int i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1(uint32_t i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(uint32_t i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1(float i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantFP>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantFP>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(float i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantFP>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantFP>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantFP>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1(bool i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(bool i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VUNDEF_IPTR() { return UndefValue::get(getVectorType(mInt32PtrTy, mVWidth)); }
-
- Value* Builder::VUNDEF(Type* t) { return UndefValue::get(getVectorType(t, mVWidth)); }
-
- Value* Builder::VUNDEF_I() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth)); }
-
- Value* Builder::VUNDEF_I_16() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth16)); }
-
- Value* Builder::VUNDEF_F() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth)); }
-
- Value* Builder::VUNDEF_F_16() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth16)); }
-
- Value* Builder::VUNDEF(Type* ty, uint32_t size)
- {
- return UndefValue::get(getVectorType(ty, size));
- }
-
- Value* Builder::VBROADCAST(Value* src, const llvm::Twine& name)
- {
- // check if src is already a vector
- if (src->getType()->isVectorTy())
- {
- return src;
- }
-
- return VECTOR_SPLAT(mVWidth, src, name);
- }
-
- Value* Builder::VBROADCAST_16(Value* src)
- {
- // check if src is already a vector
- if (src->getType()->isVectorTy())
- {
- return src;
- }
-
- return VECTOR_SPLAT(mVWidth16, src);
- }
-
- uint32_t Builder::IMMED(Value* v)
- {
- SWR_ASSERT(isa<ConstantInt>(v));
- ConstantInt* pValConst = cast<ConstantInt>(v);
- return pValConst->getZExtValue();
- }
-
- int32_t Builder::S_IMMED(Value* v)
- {
- SWR_ASSERT(isa<ConstantInt>(v));
- ConstantInt* pValConst = cast<ConstantInt>(v);
- return pValConst->getSExtValue();
- }
-
- CallInst* Builder::CALL(Value* Callee,
- const std::initializer_list<Value*>& argsList,
- const llvm::Twine& name)
- {
- std::vector<Value*> args;
- for (auto arg : argsList)
- args.push_back(arg);
-#if LLVM_VERSION_MAJOR >= 11
- // see comment to CALLA(Callee) function in the header
- return CALLA(FunctionCallee(cast<Function>(Callee)), args, name);
-#else
- return CALLA(Callee, args, name);
-#endif
- }
-
- CallInst* Builder::CALL(Value* Callee, Value* arg)
- {
- std::vector<Value*> args;
- args.push_back(arg);
-#if LLVM_VERSION_MAJOR >= 11
- // see comment to CALLA(Callee) function in the header
- return CALLA(FunctionCallee(cast<Function>(Callee)), args);
-#else
- return CALLA(Callee, args);
-#endif
- }
-
- CallInst* Builder::CALL2(Value* Callee, Value* arg1, Value* arg2)
- {
- std::vector<Value*> args;
- args.push_back(arg1);
- args.push_back(arg2);
-#if LLVM_VERSION_MAJOR >= 11
- // see comment to CALLA(Callee) function in the header
- return CALLA(FunctionCallee(cast<Function>(Callee)), args);
-#else
- return CALLA(Callee, args);
-#endif
- }
-
- CallInst* Builder::CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3)
- {
- std::vector<Value*> args;
- args.push_back(arg1);
- args.push_back(arg2);
- args.push_back(arg3);
-#if LLVM_VERSION_MAJOR >= 11
- // see comment to CALLA(Callee) function in the header
- return CALLA(FunctionCallee(cast<Function>(Callee)), args);
-#else
- return CALLA(Callee, args);
-#endif
- }
-
- Value* Builder::VRCP(Value* va, const llvm::Twine& name)
- {
- return FDIV(VIMMED1(1.0f), va, name); // 1 / a
- }
-
- Value* Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY)
- {
- Value* vOut = FMADDPS(vA, vX, vC);
- vOut = FMADDPS(vB, vY, vOut);
- return vOut;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief insert a JIT call to CallPrint
- /// - outputs formatted string to both stdout and VS output window
- /// - DEBUG builds only
- /// Usage example:
- /// PRINT("index %d = 0x%p\n",{C(lane), pIndex});
- /// where C(lane) creates a constant value to print, and pIndex is the Value*
- /// result from a GEP, printing out the pointer to memory
- /// @param printStr - constant string to print, which includes format specifiers
- /// @param printArgs - initializer list of Value*'s to print to std out
- CallInst* Builder::PRINT(const std::string& printStr,
- const std::initializer_list<Value*>& printArgs)
- {
- // push the arguments to CallPrint into a vector
- std::vector<Value*> printCallArgs;
- // save room for the format string. we still need to modify it for vectors
- printCallArgs.resize(1);
-
- // search through the format string for special processing
- size_t pos = 0;
- std::string tempStr(printStr);
- pos = tempStr.find('%', pos);
- auto v = printArgs.begin();
-
- while ((pos != std::string::npos) && (v != printArgs.end()))
- {
- Value* pArg = *v;
- Type* pType = pArg->getType();
-
- if (pType->isVectorTy())
- {
- Type* pContainedType = pType->getContainedType(0);
-#if LLVM_VERSION_MAJOR >= 12
- FixedVectorType* pVectorType = cast<FixedVectorType>(pType);
-#elif LLVM_VERSION_MAJOR >= 11
- VectorType* pVectorType = cast<VectorType>(pType);
-#endif
- if (toupper(tempStr[pos + 1]) == 'X')
- {
- tempStr[pos] = '0';
- tempStr[pos + 1] = 'x';
- tempStr.insert(pos + 2, "%08X ");
- pos += 7;
-
- printCallArgs.push_back(VEXTRACT(pArg, C(0)));
-
- std::string vectorFormatStr;
-#if LLVM_VERSION_MAJOR >= 11
- for (uint32_t i = 1; i < pVectorType->getNumElements(); ++i)
-#else
- for (uint32_t i = 1; i < pType->getVectorNumElements(); ++i)
-#endif
- {
- vectorFormatStr += "0x%08X ";
- printCallArgs.push_back(VEXTRACT(pArg, C(i)));
- }
-
- tempStr.insert(pos, vectorFormatStr);
- pos += vectorFormatStr.size();
- }
- else if ((tempStr[pos + 1] == 'f') && (pContainedType->isFloatTy()))
- {
- uint32_t i = 0;
-#if LLVM_VERSION_MAJOR >= 11
- for (; i < pVectorType->getNumElements() - 1; i++)
-#else
- for (; i < pType->getVectorNumElements() - 1; i++)
-#endif
- {
- tempStr.insert(pos, std::string("%f "));
- pos += 3;
- printCallArgs.push_back(
- FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
- }
- printCallArgs.push_back(
- FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
- }
- else if ((tempStr[pos + 1] == 'd') && (pContainedType->isIntegerTy()))
- {
- uint32_t i = 0;
-#if LLVM_VERSION_MAJOR >= 11
- for (; i < pVectorType->getNumElements() - 1; i++)
-#else
- for (; i < pType->getVectorNumElements() - 1; i++)
-#endif
- {
- tempStr.insert(pos, std::string("%d "));
- pos += 3;
- printCallArgs.push_back(
- S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
- }
- printCallArgs.push_back(
- S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
- }
- else if ((tempStr[pos + 1] == 'u') && (pContainedType->isIntegerTy()))
- {
- uint32_t i = 0;
-#if LLVM_VERSION_MAJOR >= 11
- for (; i < pVectorType->getNumElements() - 1; i++)
-#else
- for (; i < pType->getVectorNumElements() - 1; i++)
-#endif
- {
- tempStr.insert(pos, std::string("%d "));
- pos += 3;
- printCallArgs.push_back(
- Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
- }
- printCallArgs.push_back(
- Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
- }
- }
- else
- {
- if (toupper(tempStr[pos + 1]) == 'X')
- {
- tempStr[pos] = '0';
- tempStr.insert(pos + 1, "x%08");
- printCallArgs.push_back(pArg);
- pos += 3;
- }
- // for %f we need to cast float Values to doubles so that they print out correctly
- else if ((tempStr[pos + 1] == 'f') && (pType->isFloatTy()))
- {
- printCallArgs.push_back(FP_EXT(pArg, Type::getDoubleTy(JM()->mContext)));
- pos++;
- }
- else
- {
- printCallArgs.push_back(pArg);
- }
- }
-
- // advance to the next argument
- v++;
- pos = tempStr.find('%', ++pos);
- }
-
- // create global variable constant string
- Constant* constString = ConstantDataArray::getString(JM()->mContext, tempStr, true);
- GlobalVariable* gvPtr = new GlobalVariable(
- constString->getType(), true, GlobalValue::InternalLinkage, constString, "printStr");
- JM()->mpCurrentModule->getGlobalList().push_back(gvPtr);
-
- // get a pointer to the first character in the constant string array
- std::vector<Constant*> geplist{C(0), C(0)};
- Constant* strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr, geplist, false);
-
- // insert the pointer to the format string in the argument vector
- printCallArgs[0] = strGEP;
-
- // get pointer to CallPrint function and insert decl into the module if needed
- std::vector<Type*> args;
- args.push_back(PointerType::get(mInt8Ty, 0));
- FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, true);
- Function* callPrintFn =
-#if LLVM_VERSION_MAJOR >= 9
- cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy).getCallee());
-#else
- cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy));
-#endif
-
- // if we haven't yet added the symbol to the symbol table
- if ((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
- {
- sys::DynamicLibrary::AddSymbol("CallPrint", (void*)&CallPrint);
- }
-
- // insert a call to CallPrint
- return CALLA(callPrintFn, printCallArgs);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Wrapper around PRINT with initializer list.
- CallInst* Builder::PRINT(const std::string& printStr) { return PRINT(printStr, {}); }
-
- Value* Builder::EXTRACT_16(Value* x, uint32_t imm)
- {
- if (imm == 0)
- {
- return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7});
- }
- else
- {
- return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15});
- }
- }
-
- Value* Builder::JOIN_16(Value* a, Value* b)
- {
- return VSHUFFLE(a, b, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief convert x86 <N x float> mask to llvm <N x i1> mask
- Value* Builder::MASK(Value* vmask)
- {
- Value* src = BITCAST(vmask, mSimdInt32Ty);
- return ICMP_SLT(src, VIMMED1(0));
- }
-
- Value* Builder::MASK_16(Value* vmask)
- {
- Value* src = BITCAST(vmask, mSimd16Int32Ty);
- return ICMP_SLT(src, VIMMED1_16(0));
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief convert llvm <N x i1> mask to x86 <N x i32> mask
- Value* Builder::VMASK(Value* mask) { return S_EXT(mask, mSimdInt32Ty); }
-
- Value* Builder::VMASK_16(Value* mask) { return S_EXT(mask, mSimd16Int32Ty); }
-
- /// @brief Convert <Nxi1> llvm mask to integer
- Value* Builder::VMOVMSK(Value* mask)
- {
-#if LLVM_VERSION_MAJOR >= 11
-#if LLVM_VERSION_MAJOR >= 12
- FixedVectorType* pVectorType = cast<FixedVectorType>(mask->getType());
-#else
- VectorType* pVectorType = cast<VectorType>(mask->getType());
-#endif
- SWR_ASSERT(pVectorType->getElementType() == mInt1Ty);
- uint32_t numLanes = pVectorType->getNumElements();
-#else
- SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty);
- uint32_t numLanes = mask->getType()->getVectorNumElements();
-#endif
- Value* i32Result;
- if (numLanes == 8)
- {
- i32Result = BITCAST(mask, mInt8Ty);
- }
- else if (numLanes == 16)
- {
- i32Result = BITCAST(mask, mInt16Ty);
- }
- else
- {
- SWR_ASSERT("Unsupported vector width");
- i32Result = BITCAST(mask, mInt8Ty);
- }
- return Z_EXT(i32Result, mInt32Ty);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation in LLVM IR. If not
- /// supported on the underlying platform, emulate it
- /// @param a - 256bit SIMD(32x8bit) of 8bit integer values
- /// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values
- /// Byte masks in lower 128 lane of b selects 8 bit values from lower
- /// 128bits of a, and vice versa for the upper lanes. If the mask
- /// value is negative, '0' is inserted.
- Value* Builder::PSHUFB(Value* a, Value* b)
- {
- Value* res;
- // use avx2 pshufb instruction if available
- if (JM()->mArch.AVX2())
- {
- res = VPSHUFB(a, b);
- }
- else
- {
- Constant* cB = dyn_cast<Constant>(b);
- assert(cB != nullptr);
- // number of 8 bit elements in b
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElms = cast<FixedVectorType>(cB->getType())->getNumElements();
-#else
- uint32_t numElms = cast<VectorType>(cB->getType())->getNumElements();
-#endif
- // output vector
- Value* vShuf = UndefValue::get(getVectorType(mInt8Ty, numElms));
-
- // insert an 8 bit value from the high and low lanes of a per loop iteration
- numElms /= 2;
- for (uint32_t i = 0; i < numElms; i++)
- {
- ConstantInt* cLow128b = cast<ConstantInt>(cB->getAggregateElement(i));
- ConstantInt* cHigh128b = cast<ConstantInt>(cB->getAggregateElement(i + numElms));
-
- // extract values from constant mask
- char valLow128bLane = (char)(cLow128b->getSExtValue());
- char valHigh128bLane = (char)(cHigh128b->getSExtValue());
-
- Value* insertValLow128b;
- Value* insertValHigh128b;
-
- // if the mask value is negative, insert a '0' in the respective output position
- // otherwise, lookup the value at mask position (bits 3..0 of the respective mask
- // byte) in a and insert in output vector
- insertValLow128b =
- (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF)));
- insertValHigh128b = (valHigh128bLane < 0)
- ? C((char)0)
- : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms));
-
- vShuf = VINSERT(vShuf, insertValLow128b, i);
- vShuf = VINSERT(vShuf, insertValHigh128b, (i + numElms));
- }
- res = vShuf;
- }
- return res;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32
- /// bits)in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values. Only
- /// lower 8 values are used.
- Value* Builder::PMOVSXBD(Value* a)
- {
- // VPMOVSXBD output type
- Type* v8x32Ty = getVectorType(mInt32Ty, 8);
- // Extract 8 values from 128bit lane and sign extend
- return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32
- /// bits)in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
- Value* Builder::PMOVSXWD(Value* a)
- {
- // VPMOVSXWD output type
- Type* v8x32Ty = getVectorType(mInt32Ty, 8);
- // Extract 8 values from 128bit lane and sign extend
- return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
- /// in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
- Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
- {
- // Bitcast Nxint16 to Nxhalf
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElems = cast<FixedVectorType>(a->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElems = cast<VectorType>(a->getType())->getNumElements();
-#else
- uint32_t numElems = a->getType()->getVectorNumElements();
-#endif
- Value* input = BITCAST(a, getVectorType(mFP16Ty, numElems));
-
- return FP_EXT(input, getVectorType(mFP32Ty, numElems), name);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VCVTPS2PH operation (float32->float16 conversion)
- /// in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
- Value* Builder::CVTPS2PH(Value* a, Value* rounding)
- {
- if (JM()->mArch.F16C())
- {
- return VCVTPS2PH(a, rounding);
- }
- else
- {
- // call scalar C function for now
- FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty);
- Function* pCvtPs2Ph = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy).getCallee());
-#else
- JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy));
-#endif
-
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr)
- {
- sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16",
- (void*)&ConvertFloat32ToFloat16);
- }
-
- Value* pResult = UndefValue::get(mSimdInt16Ty);
- for (uint32_t i = 0; i < mVWidth; ++i)
- {
- Value* pSrc = VEXTRACT(a, C(i));
- Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc});
- pResult = VINSERT(pResult, pConv, C(i));
- }
-
- return pResult;
- }
- }
-
- Value* Builder::PMAXSD(Value* a, Value* b)
- {
- Value* cmp = ICMP_SGT(a, b);
- return SELECT(cmp, a, b);
- }
-
- Value* Builder::PMINSD(Value* a, Value* b)
- {
- Value* cmp = ICMP_SLT(a, b);
- return SELECT(cmp, a, b);
- }
-
- Value* Builder::PMAXUD(Value* a, Value* b)
- {
- Value* cmp = ICMP_UGT(a, b);
- return SELECT(cmp, a, b);
- }
-
- Value* Builder::PMINUD(Value* a, Value* b)
- {
- Value* cmp = ICMP_ULT(a, b);
- return SELECT(cmp, a, b);
- }
-
- // Helper function to create alloca in entry block of function
- Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType)
- {
- auto saveIP = IRB()->saveIP();
- IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
- Value* pAlloca = ALLOCA(pType);
- if (saveIP.isSet())
- IRB()->restoreIP(saveIP);
- return pAlloca;
- }
-
- Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize)
- {
- auto saveIP = IRB()->saveIP();
- IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
- Value* pAlloca = ALLOCA(pType, pArraySize);
- if (saveIP.isSet())
- IRB()->restoreIP(saveIP);
- return pAlloca;
- }
-
- Value* Builder::VABSPS(Value* a)
- {
- Value* asInt = BITCAST(a, mSimdInt32Ty);
- Value* result = BITCAST(AND(asInt, VIMMED1(0x7fffffff)), mSimdFP32Ty);
- return result;
- }
-
- Value* Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name)
- {
- Value* lowCmp = ICMP_SLT(src, low);
- Value* ret = SELECT(lowCmp, low, src);
-
- Value* highCmp = ICMP_SGT(ret, high);
- ret = SELECT(highCmp, high, ret, name);
-
- return ret;
- }
-
- Value* Builder::FCLAMP(Value* src, Value* low, Value* high)
- {
- Value* lowCmp = FCMP_OLT(src, low);
- Value* ret = SELECT(lowCmp, low, src);
-
- Value* highCmp = FCMP_OGT(ret, high);
- ret = SELECT(highCmp, high, ret);
-
- return ret;
- }
-
- Value* Builder::FCLAMP(Value* src, float low, float high)
- {
- Value* result = VMAXPS(src, VIMMED1(low));
- result = VMINPS(result, VIMMED1(high));
-
- return result;
- }
-
- Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
- {
- Value* vOut;
- // This maps to LLVM fmuladd intrinsic
- vOut = VFMADDPS(a, b, c);
- return vOut;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief pop count on vector mask (e.g. <8 x i1>)
- Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Float / Fixed-point conversions
- //////////////////////////////////////////////////////////////////////////
- Value* Builder::VCVT_F32_FIXED_SI(Value* vFloat,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name)
- {
- SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
- Value* fixed = nullptr;
-
-#if 0 // This doesn't work for negative numbers!!
- {
- fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
- C(_MM_FROUND_TO_NEAREST_INT)),
- mSimdInt32Ty);
- }
- else
-#endif
- {
- // Do round to nearest int on fractional bits first
- // Not entirely perfect for negative numbers, but close enough
- vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
- C(_MM_FROUND_TO_NEAREST_INT));
- vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
-
- // TODO: Handle INF, NAN, overflow / underflow, etc.
-
- Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f));
- Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
- Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1));
- vFixed = OR(vFixed, VIMMED1(1 << 23));
- vFixed = SELECT(vSgn, NEG(vFixed), vFixed);
-
- Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
- vExp = SUB(vExp, VIMMED1(127));
-
- Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
-
- fixed = ASHR(vFixed, vExtraBits, name);
- }
-
- return fixed;
- }
-
- Value* Builder::VCVT_FIXED_SI_F32(Value* vFixed,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name)
- {
- SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
- uint32_t extraBits = 32 - numIntBits - numFracBits;
- if (numIntBits && extraBits)
- {
- // Sign extend
- Value* shftAmt = VIMMED1(extraBits);
- vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt);
- }
-
- Value* fVal = VIMMED1(0.0f);
- Value* fFrac = VIMMED1(0.0f);
- if (numIntBits)
- {
- fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
- }
-
- if (numFracBits)
- {
- fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
- fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
- }
-
- return FADD(fVal, fFrac, name);
- }
-
- Value* Builder::VCVT_F32_FIXED_UI(Value* vFloat,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name)
- {
- SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
- Value* fixed = nullptr;
-#if 1 // KNOB_SIM_FAST_MATH? Below works correctly from a precision
- // standpoint...
- {
- fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
- C(_MM_FROUND_TO_NEAREST_INT)),
- mSimdInt32Ty);
- }
-#else
- {
- // Do round to nearest int on fractional bits first
- vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
- C(_MM_FROUND_TO_NEAREST_INT));
- vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
-
- // TODO: Handle INF, NAN, overflow / underflow, etc.
-
- Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f));
- Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
- Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1));
- vFixed = OR(vFixed, VIMMED1(1 << 23));
-
- Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
- vExp = SUB(vExp, VIMMED1(127));
-
- Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
-
- fixed = LSHR(vFixed, vExtraBits, name);
- }
-#endif
- return fixed;
- }
-
- Value* Builder::VCVT_FIXED_UI_F32(Value* vFixed,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name)
- {
- SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
- uint32_t extraBits = 32 - numIntBits - numFracBits;
- if (numIntBits && extraBits)
- {
- // Sign extend
- Value* shftAmt = VIMMED1(extraBits);
- vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt);
- }
-
- Value* fVal = VIMMED1(0.0f);
- Value* fFrac = VIMMED1(0.0f);
- if (numIntBits)
- {
- fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
- }
-
- if (numFracBits)
- {
- fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
- fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
- }
-
- return FADD(fVal, fFrac, name);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief C functions called by LLVM IR
- //////////////////////////////////////////////////////////////////////////
-
- Value* Builder::VEXTRACTI128(Value* a, Constant* imm8)
- {
- bool flag = !imm8->isZeroValue();
- SmallVector<Constant*, 8> idx;
- for (unsigned i = 0; i < mVWidth / 2; i++)
- {
- idx.push_back(C(flag ? i + mVWidth / 2 : i));
- }
- return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx));
- }
-
- Value* Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
- {
- bool flag = !imm8->isZeroValue();
- SmallVector<Constant*, 8> idx;
- for (unsigned i = 0; i < mVWidth; i++)
- {
- idx.push_back(C(i));
- }
- Value* inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
-
- SmallVector<Constant*, 8> idx2;
- for (unsigned i = 0; i < mVWidth / 2; i++)
- {
- idx2.push_back(C(flag ? i : i + mVWidth));
- }
- for (unsigned i = mVWidth / 2; i < mVWidth; i++)
- {
- idx2.push_back(C(flag ? i + mVWidth / 2 : i));
- }
- return VSHUFFLE(a, inter, ConstantVector::get(idx2));
- }
-
- // rdtsc buckets macros
- void Builder::RDTSC_START(Value* pBucketMgr, Value* pId)
- {
- // @todo due to an issue with thread local storage propagation in llvm, we can only safely
- // call into buckets framework when single threaded
- if (KNOB_SINGLE_THREADED)
- {
- std::vector<Type*> args{
- PointerType::get(mInt32Ty, 0), // pBucketMgr
- mInt32Ty // id
- };
-
- FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
- Function* pFunc = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy).getCallee());
-#else
- JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
-#endif
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") ==
- nullptr)
- {
- sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket",
- (void*)&BucketManager_StartBucket);
- }
-
- CALL(pFunc, {pBucketMgr, pId});
- }
- }
-
- void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId)
- {
- // @todo due to an issue with thread local storage propagation in llvm, we can only safely
- // call into buckets framework when single threaded
- if (KNOB_SINGLE_THREADED)
- {
- std::vector<Type*> args{
- PointerType::get(mInt32Ty, 0), // pBucketMgr
- mInt32Ty // id
- };
-
- FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
- Function* pFunc = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy).getCallee());
-#else
- JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
-#endif
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") ==
- nullptr)
- {
- sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket",
- (void*)&BucketManager_StopBucket);
- }
-
- CALL(pFunc, {pBucketMgr, pId});
- }
- }
-
- uint32_t Builder::GetTypeSize(Type* pType)
- {
- if (pType->isStructTy())
- {
- uint32_t numElems = pType->getStructNumElements();
- Type* pElemTy = pType->getStructElementType(0);
- return numElems * GetTypeSize(pElemTy);
- }
-
- if (pType->isArrayTy())
- {
- uint32_t numElems = pType->getArrayNumElements();
- Type* pElemTy = pType->getArrayElementType();
- return numElems * GetTypeSize(pElemTy);
- }
-
- if (pType->isIntegerTy())
- {
- uint32_t bitSize = pType->getIntegerBitWidth();
- return bitSize / 8;
- }
-
- if (pType->isFloatTy())
- {
- return 4;
- }
-
- if (pType->isHalfTy())
- {
- return 2;
- }
-
- if (pType->isDoubleTy())
- {
- return 8;
- }
-
- SWR_ASSERT(false, "Unimplemented type.");
- return 0;
- }
-} // namespace SwrJit