diff options
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp | 1125 |
1 files changed, 0 insertions, 1125 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp deleted file mode 100644 index 8080a40a1f9..00000000000 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ /dev/null @@ -1,1125 +0,0 @@ -/**************************************************************************** - * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * @file builder_misc.cpp - * - * @brief Implementation for miscellaneous builder functions - * - * Notes: - * - ******************************************************************************/ -#include "jit_pch.hpp" -#include "builder.h" -#include "common/rdtsc_buckets.h" - -#include <cstdarg> - -extern "C" void CallPrint(const char* fmt, ...); - -namespace SwrJit -{ - ////////////////////////////////////////////////////////////////////////// - /// @brief Convert an IEEE 754 32-bit single precision float to an - /// 16 bit float with 5 exponent bits and a variable - /// number of mantissa bits. - /// @param val - 32-bit float - /// @todo Maybe move this outside of this file into a header? - static uint16_t ConvertFloat32ToFloat16(float val) - { - uint32_t sign, exp, mant; - uint32_t roundBits; - - // Extract the sign, exponent, and mantissa - uint32_t uf = *(uint32_t*)&val; - sign = (uf & 0x80000000) >> 31; - exp = (uf & 0x7F800000) >> 23; - mant = uf & 0x007FFFFF; - - // Check for out of range - if (std::isnan(val)) - { - exp = 0x1F; - mant = 0x200; - sign = 1; // set the sign bit for NANs - } - else if (std::isinf(val)) - { - exp = 0x1f; - mant = 0x0; - } - else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value - { - exp = 0x1E; - mant = 0x3FF; - } - else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm - { - mant |= 0x00800000; - for (; exp <= 0x70; mant >>= 1, exp++) - ; - exp = 0; - mant = mant >> 13; - } - else if (exp < 0x66) // Too small to represent -> Zero - { - exp = 0; - mant = 0; - } - else - { - // Saves bits that will be shifted off for rounding - roundBits = mant & 0x1FFFu; - // convert exponent and mantissa to 16 bit format - exp = exp - 0x70; - mant = mant >> 13; - - // Essentially RTZ, but round up if off by only 1 lsb - if (roundBits == 0x1FFFu) - { - mant++; - // check for overflow - if ((mant & 0xC00u) != 0) - exp++; - // make sure only the needed bits are used - mant &= 0x3FF; - } - } - - uint32_t tmpVal = (sign << 15) | (exp << 10) | mant; - return (uint16_t)tmpVal; - } - - Constant* Builder::C(bool i) { return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0)); } - - Constant* Builder::C(char i) { return ConstantInt::get(IRB()->getInt8Ty(), i); } - - Constant* Builder::C(uint8_t i) { return ConstantInt::get(IRB()->getInt8Ty(), i); } - - Constant* Builder::C(int i) { return ConstantInt::get(IRB()->getInt32Ty(), i); } - - Constant* Builder::C(int64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); } - - Constant* Builder::C(uint16_t i) { return ConstantInt::get(mInt16Ty, i); } - - Constant* Builder::C(uint32_t i) { return ConstantInt::get(IRB()->getInt32Ty(), i); } - - Constant* Builder::C(uint64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); } - - Constant* Builder::C(float i) { return ConstantFP::get(IRB()->getFloatTy(), i); } - - Constant* Builder::PRED(bool pred) - { - return ConstantInt::get(IRB()->getInt1Ty(), (pred ? 1 : 0)); - } - - Value* Builder::VIMMED1(uint64_t i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i))); -#endif - } - - Value* Builder::VIMMED1_16(uint64_t i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i))); -#endif - } - - Value* Builder::VIMMED1(int i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i))); -#endif - } - - Value* Builder::VIMMED1_16(int i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i))); -#endif - } - - Value* Builder::VIMMED1(uint32_t i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i))); -#endif - } - - Value* Builder::VIMMED1_16(uint32_t i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i))); -#endif - } - - Value* Builder::VIMMED1(float i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantFP>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantFP>(C(i))); -#endif - } - - Value* Builder::VIMMED1_16(float i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth16, cast<ConstantFP>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantFP>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantFP>(C(i))); -#endif - } - - Value* Builder::VIMMED1(bool i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i))); -#endif - } - - Value* Builder::VIMMED1_16(bool i) - { -#if LLVM_VERSION_MAJOR <= 10 - return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i))); -#elif LLVM_VERSION_MAJOR == 11 - return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i))); -#else - return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i))); -#endif - } - - Value* Builder::VUNDEF_IPTR() { return UndefValue::get(getVectorType(mInt32PtrTy, mVWidth)); } - - Value* Builder::VUNDEF(Type* t) { return UndefValue::get(getVectorType(t, mVWidth)); } - - Value* Builder::VUNDEF_I() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth)); } - - Value* Builder::VUNDEF_I_16() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth16)); } - - Value* Builder::VUNDEF_F() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth)); } - - Value* Builder::VUNDEF_F_16() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth16)); } - - Value* Builder::VUNDEF(Type* ty, uint32_t size) - { - return UndefValue::get(getVectorType(ty, size)); - } - - Value* Builder::VBROADCAST(Value* src, const llvm::Twine& name) - { - // check if src is already a vector - if (src->getType()->isVectorTy()) - { - return src; - } - - return VECTOR_SPLAT(mVWidth, src, name); - } - - Value* Builder::VBROADCAST_16(Value* src) - { - // check if src is already a vector - if (src->getType()->isVectorTy()) - { - return src; - } - - return VECTOR_SPLAT(mVWidth16, src); - } - - uint32_t Builder::IMMED(Value* v) - { - SWR_ASSERT(isa<ConstantInt>(v)); - ConstantInt* pValConst = cast<ConstantInt>(v); - return pValConst->getZExtValue(); - } - - int32_t Builder::S_IMMED(Value* v) - { - SWR_ASSERT(isa<ConstantInt>(v)); - ConstantInt* pValConst = cast<ConstantInt>(v); - return pValConst->getSExtValue(); - } - - CallInst* Builder::CALL(Value* Callee, - const std::initializer_list<Value*>& argsList, - const llvm::Twine& name) - { - std::vector<Value*> args; - for (auto arg : argsList) - args.push_back(arg); -#if LLVM_VERSION_MAJOR >= 11 - // see comment to CALLA(Callee) function in the header - return CALLA(FunctionCallee(cast<Function>(Callee)), args, name); -#else - return CALLA(Callee, args, name); -#endif - } - - CallInst* Builder::CALL(Value* Callee, Value* arg) - { - std::vector<Value*> args; - args.push_back(arg); -#if LLVM_VERSION_MAJOR >= 11 - // see comment to CALLA(Callee) function in the header - return CALLA(FunctionCallee(cast<Function>(Callee)), args); -#else - return CALLA(Callee, args); -#endif - } - - CallInst* Builder::CALL2(Value* Callee, Value* arg1, Value* arg2) - { - std::vector<Value*> args; - args.push_back(arg1); - args.push_back(arg2); -#if LLVM_VERSION_MAJOR >= 11 - // see comment to CALLA(Callee) function in the header - return CALLA(FunctionCallee(cast<Function>(Callee)), args); -#else - return CALLA(Callee, args); -#endif - } - - CallInst* Builder::CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3) - { - std::vector<Value*> args; - args.push_back(arg1); - args.push_back(arg2); - args.push_back(arg3); -#if LLVM_VERSION_MAJOR >= 11 - // see comment to CALLA(Callee) function in the header - return CALLA(FunctionCallee(cast<Function>(Callee)), args); -#else - return CALLA(Callee, args); -#endif - } - - Value* Builder::VRCP(Value* va, const llvm::Twine& name) - { - return FDIV(VIMMED1(1.0f), va, name); // 1 / a - } - - Value* Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY) - { - Value* vOut = FMADDPS(vA, vX, vC); - vOut = FMADDPS(vB, vY, vOut); - return vOut; - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief insert a JIT call to CallPrint - /// - outputs formatted string to both stdout and VS output window - /// - DEBUG builds only - /// Usage example: - /// PRINT("index %d = 0x%p\n",{C(lane), pIndex}); - /// where C(lane) creates a constant value to print, and pIndex is the Value* - /// result from a GEP, printing out the pointer to memory - /// @param printStr - constant string to print, which includes format specifiers - /// @param printArgs - initializer list of Value*'s to print to std out - CallInst* Builder::PRINT(const std::string& printStr, - const std::initializer_list<Value*>& printArgs) - { - // push the arguments to CallPrint into a vector - std::vector<Value*> printCallArgs; - // save room for the format string. we still need to modify it for vectors - printCallArgs.resize(1); - - // search through the format string for special processing - size_t pos = 0; - std::string tempStr(printStr); - pos = tempStr.find('%', pos); - auto v = printArgs.begin(); - - while ((pos != std::string::npos) && (v != printArgs.end())) - { - Value* pArg = *v; - Type* pType = pArg->getType(); - - if (pType->isVectorTy()) - { - Type* pContainedType = pType->getContainedType(0); -#if LLVM_VERSION_MAJOR >= 12 - FixedVectorType* pVectorType = cast<FixedVectorType>(pType); -#elif LLVM_VERSION_MAJOR >= 11 - VectorType* pVectorType = cast<VectorType>(pType); -#endif - if (toupper(tempStr[pos + 1]) == 'X') - { - tempStr[pos] = '0'; - tempStr[pos + 1] = 'x'; - tempStr.insert(pos + 2, "%08X "); - pos += 7; - - printCallArgs.push_back(VEXTRACT(pArg, C(0))); - - std::string vectorFormatStr; -#if LLVM_VERSION_MAJOR >= 11 - for (uint32_t i = 1; i < pVectorType->getNumElements(); ++i) -#else - for (uint32_t i = 1; i < pType->getVectorNumElements(); ++i) -#endif - { - vectorFormatStr += "0x%08X "; - printCallArgs.push_back(VEXTRACT(pArg, C(i))); - } - - tempStr.insert(pos, vectorFormatStr); - pos += vectorFormatStr.size(); - } - else if ((tempStr[pos + 1] == 'f') && (pContainedType->isFloatTy())) - { - uint32_t i = 0; -#if LLVM_VERSION_MAJOR >= 11 - for (; i < pVectorType->getNumElements() - 1; i++) -#else - for (; i < pType->getVectorNumElements() - 1; i++) -#endif - { - tempStr.insert(pos, std::string("%f ")); - pos += 3; - printCallArgs.push_back( - FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext))); - } - printCallArgs.push_back( - FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext))); - } - else if ((tempStr[pos + 1] == 'd') && (pContainedType->isIntegerTy())) - { - uint32_t i = 0; -#if LLVM_VERSION_MAJOR >= 11 - for (; i < pVectorType->getNumElements() - 1; i++) -#else - for (; i < pType->getVectorNumElements() - 1; i++) -#endif - { - tempStr.insert(pos, std::string("%d ")); - pos += 3; - printCallArgs.push_back( - S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext))); - } - printCallArgs.push_back( - S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext))); - } - else if ((tempStr[pos + 1] == 'u') && (pContainedType->isIntegerTy())) - { - uint32_t i = 0; -#if LLVM_VERSION_MAJOR >= 11 - for (; i < pVectorType->getNumElements() - 1; i++) -#else - for (; i < pType->getVectorNumElements() - 1; i++) -#endif - { - tempStr.insert(pos, std::string("%d ")); - pos += 3; - printCallArgs.push_back( - Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext))); - } - printCallArgs.push_back( - Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext))); - } - } - else - { - if (toupper(tempStr[pos + 1]) == 'X') - { - tempStr[pos] = '0'; - tempStr.insert(pos + 1, "x%08"); - printCallArgs.push_back(pArg); - pos += 3; - } - // for %f we need to cast float Values to doubles so that they print out correctly - else if ((tempStr[pos + 1] == 'f') && (pType->isFloatTy())) - { - printCallArgs.push_back(FP_EXT(pArg, Type::getDoubleTy(JM()->mContext))); - pos++; - } - else - { - printCallArgs.push_back(pArg); - } - } - - // advance to the next argument - v++; - pos = tempStr.find('%', ++pos); - } - - // create global variable constant string - Constant* constString = ConstantDataArray::getString(JM()->mContext, tempStr, true); - GlobalVariable* gvPtr = new GlobalVariable( - constString->getType(), true, GlobalValue::InternalLinkage, constString, "printStr"); - JM()->mpCurrentModule->getGlobalList().push_back(gvPtr); - - // get a pointer to the first character in the constant string array - std::vector<Constant*> geplist{C(0), C(0)}; - Constant* strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr, geplist, false); - - // insert the pointer to the format string in the argument vector - printCallArgs[0] = strGEP; - - // get pointer to CallPrint function and insert decl into the module if needed - std::vector<Type*> args; - args.push_back(PointerType::get(mInt8Ty, 0)); - FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, true); - Function* callPrintFn = -#if LLVM_VERSION_MAJOR >= 9 - cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy).getCallee()); -#else - cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy)); -#endif - - // if we haven't yet added the symbol to the symbol table - if ((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr) - { - sys::DynamicLibrary::AddSymbol("CallPrint", (void*)&CallPrint); - } - - // insert a call to CallPrint - return CALLA(callPrintFn, printCallArgs); - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief Wrapper around PRINT with initializer list. - CallInst* Builder::PRINT(const std::string& printStr) { return PRINT(printStr, {}); } - - Value* Builder::EXTRACT_16(Value* x, uint32_t imm) - { - if (imm == 0) - { - return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7}); - } - else - { - return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15}); - } - } - - Value* Builder::JOIN_16(Value* a, Value* b) - { - return VSHUFFLE(a, b, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief convert x86 <N x float> mask to llvm <N x i1> mask - Value* Builder::MASK(Value* vmask) - { - Value* src = BITCAST(vmask, mSimdInt32Ty); - return ICMP_SLT(src, VIMMED1(0)); - } - - Value* Builder::MASK_16(Value* vmask) - { - Value* src = BITCAST(vmask, mSimd16Int32Ty); - return ICMP_SLT(src, VIMMED1_16(0)); - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief convert llvm <N x i1> mask to x86 <N x i32> mask - Value* Builder::VMASK(Value* mask) { return S_EXT(mask, mSimdInt32Ty); } - - Value* Builder::VMASK_16(Value* mask) { return S_EXT(mask, mSimd16Int32Ty); } - - /// @brief Convert <Nxi1> llvm mask to integer - Value* Builder::VMOVMSK(Value* mask) - { -#if LLVM_VERSION_MAJOR >= 11 -#if LLVM_VERSION_MAJOR >= 12 - FixedVectorType* pVectorType = cast<FixedVectorType>(mask->getType()); -#else - VectorType* pVectorType = cast<VectorType>(mask->getType()); -#endif - SWR_ASSERT(pVectorType->getElementType() == mInt1Ty); - uint32_t numLanes = pVectorType->getNumElements(); -#else - SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty); - uint32_t numLanes = mask->getType()->getVectorNumElements(); -#endif - Value* i32Result; - if (numLanes == 8) - { - i32Result = BITCAST(mask, mInt8Ty); - } - else if (numLanes == 16) - { - i32Result = BITCAST(mask, mInt16Ty); - } - else - { - SWR_ASSERT("Unsupported vector width"); - i32Result = BITCAST(mask, mInt8Ty); - } - return Z_EXT(i32Result, mInt32Ty); - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief Generate a VPSHUFB operation in LLVM IR. If not - /// supported on the underlying platform, emulate it - /// @param a - 256bit SIMD(32x8bit) of 8bit integer values - /// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values - /// Byte masks in lower 128 lane of b selects 8 bit values from lower - /// 128bits of a, and vice versa for the upper lanes. If the mask - /// value is negative, '0' is inserted. - Value* Builder::PSHUFB(Value* a, Value* b) - { - Value* res; - // use avx2 pshufb instruction if available - if (JM()->mArch.AVX2()) - { - res = VPSHUFB(a, b); - } - else - { - Constant* cB = dyn_cast<Constant>(b); - assert(cB != nullptr); - // number of 8 bit elements in b -#if LLVM_VERSION_MAJOR >= 12 - uint32_t numElms = cast<FixedVectorType>(cB->getType())->getNumElements(); -#else - uint32_t numElms = cast<VectorType>(cB->getType())->getNumElements(); -#endif - // output vector - Value* vShuf = UndefValue::get(getVectorType(mInt8Ty, numElms)); - - // insert an 8 bit value from the high and low lanes of a per loop iteration - numElms /= 2; - for (uint32_t i = 0; i < numElms; i++) - { - ConstantInt* cLow128b = cast<ConstantInt>(cB->getAggregateElement(i)); - ConstantInt* cHigh128b = cast<ConstantInt>(cB->getAggregateElement(i + numElms)); - - // extract values from constant mask - char valLow128bLane = (char)(cLow128b->getSExtValue()); - char valHigh128bLane = (char)(cHigh128b->getSExtValue()); - - Value* insertValLow128b; - Value* insertValHigh128b; - - // if the mask value is negative, insert a '0' in the respective output position - // otherwise, lookup the value at mask position (bits 3..0 of the respective mask - // byte) in a and insert in output vector - insertValLow128b = - (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF))); - insertValHigh128b = (valHigh128bLane < 0) - ? C((char)0) - : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms)); - - vShuf = VINSERT(vShuf, insertValLow128b, i); - vShuf = VINSERT(vShuf, insertValHigh128b, (i + numElms)); - } - res = vShuf; - } - return res; - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32 - /// bits)in LLVM IR. If not supported on the underlying platform, emulate it - /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values. Only - /// lower 8 values are used. - Value* Builder::PMOVSXBD(Value* a) - { - // VPMOVSXBD output type - Type* v8x32Ty = getVectorType(mInt32Ty, 8); - // Extract 8 values from 128bit lane and sign extend - return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32 - /// bits)in LLVM IR. If not supported on the underlying platform, emulate it - /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values. - Value* Builder::PMOVSXWD(Value* a) - { - // VPMOVSXWD output type - Type* v8x32Ty = getVectorType(mInt32Ty, 8); - // Extract 8 values from 128bit lane and sign extend - return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief Generate a VCVTPH2PS operation (float16->float32 conversion) - /// in LLVM IR. If not supported on the underlying platform, emulate it - /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format. - Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name) - { - // Bitcast Nxint16 to Nxhalf -#if LLVM_VERSION_MAJOR >= 12 - uint32_t numElems = cast<FixedVectorType>(a->getType())->getNumElements(); -#elif LLVM_VERSION_MAJOR >= 11 - uint32_t numElems = cast<VectorType>(a->getType())->getNumElements(); -#else - uint32_t numElems = a->getType()->getVectorNumElements(); -#endif - Value* input = BITCAST(a, getVectorType(mFP16Ty, numElems)); - - return FP_EXT(input, getVectorType(mFP32Ty, numElems), name); - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief Generate a VCVTPS2PH operation (float32->float16 conversion) - /// in LLVM IR. If not supported on the underlying platform, emulate it - /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format. - Value* Builder::CVTPS2PH(Value* a, Value* rounding) - { - if (JM()->mArch.F16C()) - { - return VCVTPS2PH(a, rounding); - } - else - { - // call scalar C function for now - FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty); - Function* pCvtPs2Ph = cast<Function>( -#if LLVM_VERSION_MAJOR >= 9 - JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy).getCallee()); -#else - JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy)); -#endif - - if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr) - { - sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16", - (void*)&ConvertFloat32ToFloat16); - } - - Value* pResult = UndefValue::get(mSimdInt16Ty); - for (uint32_t i = 0; i < mVWidth; ++i) - { - Value* pSrc = VEXTRACT(a, C(i)); - Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc}); - pResult = VINSERT(pResult, pConv, C(i)); - } - - return pResult; - } - } - - Value* Builder::PMAXSD(Value* a, Value* b) - { - Value* cmp = ICMP_SGT(a, b); - return SELECT(cmp, a, b); - } - - Value* Builder::PMINSD(Value* a, Value* b) - { - Value* cmp = ICMP_SLT(a, b); - return SELECT(cmp, a, b); - } - - Value* Builder::PMAXUD(Value* a, Value* b) - { - Value* cmp = ICMP_UGT(a, b); - return SELECT(cmp, a, b); - } - - Value* Builder::PMINUD(Value* a, Value* b) - { - Value* cmp = ICMP_ULT(a, b); - return SELECT(cmp, a, b); - } - - // Helper function to create alloca in entry block of function - Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType) - { - auto saveIP = IRB()->saveIP(); - IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin()); - Value* pAlloca = ALLOCA(pType); - if (saveIP.isSet()) - IRB()->restoreIP(saveIP); - return pAlloca; - } - - Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize) - { - auto saveIP = IRB()->saveIP(); - IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin()); - Value* pAlloca = ALLOCA(pType, pArraySize); - if (saveIP.isSet()) - IRB()->restoreIP(saveIP); - return pAlloca; - } - - Value* Builder::VABSPS(Value* a) - { - Value* asInt = BITCAST(a, mSimdInt32Ty); - Value* result = BITCAST(AND(asInt, VIMMED1(0x7fffffff)), mSimdFP32Ty); - return result; - } - - Value* Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name) - { - Value* lowCmp = ICMP_SLT(src, low); - Value* ret = SELECT(lowCmp, low, src); - - Value* highCmp = ICMP_SGT(ret, high); - ret = SELECT(highCmp, high, ret, name); - - return ret; - } - - Value* Builder::FCLAMP(Value* src, Value* low, Value* high) - { - Value* lowCmp = FCMP_OLT(src, low); - Value* ret = SELECT(lowCmp, low, src); - - Value* highCmp = FCMP_OGT(ret, high); - ret = SELECT(highCmp, high, ret); - - return ret; - } - - Value* Builder::FCLAMP(Value* src, float low, float high) - { - Value* result = VMAXPS(src, VIMMED1(low)); - result = VMINPS(result, VIMMED1(high)); - - return result; - } - - Value* Builder::FMADDPS(Value* a, Value* b, Value* c) - { - Value* vOut; - // This maps to LLVM fmuladd intrinsic - vOut = VFMADDPS(a, b, c); - return vOut; - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief pop count on vector mask (e.g. <8 x i1>) - Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); } - - ////////////////////////////////////////////////////////////////////////// - /// @brief Float / Fixed-point conversions - ////////////////////////////////////////////////////////////////////////// - Value* Builder::VCVT_F32_FIXED_SI(Value* vFloat, - uint32_t numIntBits, - uint32_t numFracBits, - const llvm::Twine& name) - { - SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); - Value* fixed = nullptr; - -#if 0 // This doesn't work for negative numbers!! - { - fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), - C(_MM_FROUND_TO_NEAREST_INT)), - mSimdInt32Ty); - } - else -#endif - { - // Do round to nearest int on fractional bits first - // Not entirely perfect for negative numbers, but close enough - vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), - C(_MM_FROUND_TO_NEAREST_INT)); - vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); - - // TODO: Handle INF, NAN, overflow / underflow, etc. - - Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); - Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); - Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); - vFixed = OR(vFixed, VIMMED1(1 << 23)); - vFixed = SELECT(vSgn, NEG(vFixed), vFixed); - - Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); - vExp = SUB(vExp, VIMMED1(127)); - - Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); - - fixed = ASHR(vFixed, vExtraBits, name); - } - - return fixed; - } - - Value* Builder::VCVT_FIXED_SI_F32(Value* vFixed, - uint32_t numIntBits, - uint32_t numFracBits, - const llvm::Twine& name) - { - SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); - uint32_t extraBits = 32 - numIntBits - numFracBits; - if (numIntBits && extraBits) - { - // Sign extend - Value* shftAmt = VIMMED1(extraBits); - vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); - } - - Value* fVal = VIMMED1(0.0f); - Value* fFrac = VIMMED1(0.0f); - if (numIntBits) - { - fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); - } - - if (numFracBits) - { - fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); - fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); - } - - return FADD(fVal, fFrac, name); - } - - Value* Builder::VCVT_F32_FIXED_UI(Value* vFloat, - uint32_t numIntBits, - uint32_t numFracBits, - const llvm::Twine& name) - { - SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); - Value* fixed = nullptr; -#if 1 // KNOB_SIM_FAST_MATH? Below works correctly from a precision - // standpoint... - { - fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), - C(_MM_FROUND_TO_NEAREST_INT)), - mSimdInt32Ty); - } -#else - { - // Do round to nearest int on fractional bits first - vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), - C(_MM_FROUND_TO_NEAREST_INT)); - vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); - - // TODO: Handle INF, NAN, overflow / underflow, etc. - - Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); - Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); - Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); - vFixed = OR(vFixed, VIMMED1(1 << 23)); - - Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); - vExp = SUB(vExp, VIMMED1(127)); - - Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); - - fixed = LSHR(vFixed, vExtraBits, name); - } -#endif - return fixed; - } - - Value* Builder::VCVT_FIXED_UI_F32(Value* vFixed, - uint32_t numIntBits, - uint32_t numFracBits, - const llvm::Twine& name) - { - SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); - uint32_t extraBits = 32 - numIntBits - numFracBits; - if (numIntBits && extraBits) - { - // Sign extend - Value* shftAmt = VIMMED1(extraBits); - vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); - } - - Value* fVal = VIMMED1(0.0f); - Value* fFrac = VIMMED1(0.0f); - if (numIntBits) - { - fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); - } - - if (numFracBits) - { - fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); - fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); - } - - return FADD(fVal, fFrac, name); - } - - ////////////////////////////////////////////////////////////////////////// - /// @brief C functions called by LLVM IR - ////////////////////////////////////////////////////////////////////////// - - Value* Builder::VEXTRACTI128(Value* a, Constant* imm8) - { - bool flag = !imm8->isZeroValue(); - SmallVector<Constant*, 8> idx; - for (unsigned i = 0; i < mVWidth / 2; i++) - { - idx.push_back(C(flag ? i + mVWidth / 2 : i)); - } - return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx)); - } - - Value* Builder::VINSERTI128(Value* a, Value* b, Constant* imm8) - { - bool flag = !imm8->isZeroValue(); - SmallVector<Constant*, 8> idx; - for (unsigned i = 0; i < mVWidth; i++) - { - idx.push_back(C(i)); - } - Value* inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx)); - - SmallVector<Constant*, 8> idx2; - for (unsigned i = 0; i < mVWidth / 2; i++) - { - idx2.push_back(C(flag ? i : i + mVWidth)); - } - for (unsigned i = mVWidth / 2; i < mVWidth; i++) - { - idx2.push_back(C(flag ? i + mVWidth / 2 : i)); - } - return VSHUFFLE(a, inter, ConstantVector::get(idx2)); - } - - // rdtsc buckets macros - void Builder::RDTSC_START(Value* pBucketMgr, Value* pId) - { - // @todo due to an issue with thread local storage propagation in llvm, we can only safely - // call into buckets framework when single threaded - if (KNOB_SINGLE_THREADED) - { - std::vector<Type*> args{ - PointerType::get(mInt32Ty, 0), // pBucketMgr - mInt32Ty // id - }; - - FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false); - Function* pFunc = cast<Function>( -#if LLVM_VERSION_MAJOR >= 9 - JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy).getCallee()); -#else - JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy)); -#endif - if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") == - nullptr) - { - sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket", - (void*)&BucketManager_StartBucket); - } - - CALL(pFunc, {pBucketMgr, pId}); - } - } - - void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId) - { - // @todo due to an issue with thread local storage propagation in llvm, we can only safely - // call into buckets framework when single threaded - if (KNOB_SINGLE_THREADED) - { - std::vector<Type*> args{ - PointerType::get(mInt32Ty, 0), // pBucketMgr - mInt32Ty // id - }; - - FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false); - Function* pFunc = cast<Function>( -#if LLVM_VERSION_MAJOR >= 9 - JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy).getCallee()); -#else - JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy)); -#endif - if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") == - nullptr) - { - sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket", - (void*)&BucketManager_StopBucket); - } - - CALL(pFunc, {pBucketMgr, pId}); - } - } - - uint32_t Builder::GetTypeSize(Type* pType) - { - if (pType->isStructTy()) - { - uint32_t numElems = pType->getStructNumElements(); - Type* pElemTy = pType->getStructElementType(0); - return numElems * GetTypeSize(pElemTy); - } - - if (pType->isArrayTy()) - { - uint32_t numElems = pType->getArrayNumElements(); - Type* pElemTy = pType->getArrayElementType(); - return numElems * GetTypeSize(pElemTy); - } - - if (pType->isIntegerTy()) - { - uint32_t bitSize = pType->getIntegerBitWidth(); - return bitSize / 8; - } - - if (pType->isFloatTy()) - { - return 4; - } - - if (pType->isHalfTy()) - { - return 2; - } - - if (pType->isDoubleTy()) - { - return 8; - } - - SWR_ASSERT(false, "Unimplemented type."); - return 0; - } -} // namespace SwrJit |