diff options
Diffstat (limited to 'src/amd/addrlib/src/gfx10/gfx10addrlib.cpp')
-rw-r--r-- | src/amd/addrlib/src/gfx10/gfx10addrlib.cpp | 331 |
1 files changed, 163 insertions, 168 deletions
diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp index 14ce04d379c..f79289e1136 100644 --- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp +++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp @@ -1,28 +1,10 @@ /* - * Copyright © 2007-2019 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ +************************************************************************************************************************ +* +* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. +* SPDX-License-Identifier: MIT +* +***********************************************************************************************************************/ /** ************************************************************************************************************************ @@ -32,6 +14,7 @@ */ #include "gfx10addrlib.h" +#include "addrcommon.h" #include "gfx10_gb_reg.h" #include "amdgpu_asic_addr.h" @@ -66,46 +49,46 @@ namespace V2 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] = {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR - {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S - {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S - {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S - {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T - {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X - {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - - {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X - {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X - {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X - {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X - - {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved - {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL + {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR + {{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_S + {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S + {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S + {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T + {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X + {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X + {{0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_4KB_R_X + + {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X + {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X + {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X + {{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_64KB_R_X + + {{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_VAR_Z_X + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_VAR_R_X + {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL }; const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}}; @@ -129,6 +112,7 @@ Gfx10Lib::Gfx10Lib(const Client* pClient) m_numSaLog2(0), m_colorBaseIndex(0), m_xmaskBaseIndex(0), + m_htileBaseIndex(0), m_dccBaseIndex(0) { memset(&m_settings, 0, sizeof(m_settings)); @@ -611,7 +595,6 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord( (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX : (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX); - const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7; const UINT_32 blkMask = (1 << blkSizeLog2) - 1; const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]], @@ -677,10 +660,9 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord( { const UINT_32 numSampleLog2 = Log2(pIn->numSamples); const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; - const UINT_32 index = m_xmaskBaseIndex + numSampleLog2; + const UINT_32 index = m_htileBaseIndex + numSampleLog2; const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX; - const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4; const UINT_32 blkMask = (1 << blkSizeLog2) - 1; const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]], @@ -951,9 +933,11 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( { // Skip unaligned case - m_xmaskBaseIndex += MaxNumOfAA; + m_xmaskBaseIndex += MaxNumOfBppCMask; + m_htileBaseIndex += MaxNumOfAA; - m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA; + m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask; + m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA; m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp; if (m_settings.supportRbPlus) @@ -969,7 +953,8 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( if (m_numPkrLog2 >= 2) { m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp; - m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA; + m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask; + m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA; } } else @@ -979,9 +964,8 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( 1; ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA); - - ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == - sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0])); + ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) == + (numPipeType + 1) * MaxNumOfBppCMask); } } @@ -992,7 +976,6 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( m_blockVarSizeLog2 = m_pipesLog2 + 14; } - if (valid) { InitEquationTable(); @@ -1039,25 +1022,25 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( m_settings.isDcn20 = 1; } - if (ASICREV_IS_SIENNA_CICHLID(chipRevision)) + if (ASICREV_IS_NAVI21_M(chipRevision)) { m_settings.supportRbPlus = 1; m_settings.dccUnsup3DSwDis = 0; } - if (ASICREV_IS_NAVY_FLOUNDER(chipRevision)) + if (ASICREV_IS_NAVI22_P(chipRevision)) { m_settings.supportRbPlus = 1; m_settings.dccUnsup3DSwDis = 0; } - if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision)) + if (ASICREV_IS_NAVI23_P(chipRevision)) { m_settings.supportRbPlus = 1; m_settings.dccUnsup3DSwDis = 0; } - if (ASICREV_IS_BEIGE_GOBY(chipRevision)) + if (ASICREV_IS_NAVI24_P(chipRevision)) { m_settings.supportRbPlus = 1; m_settings.dccUnsup3DSwDis = 0; @@ -1074,11 +1057,27 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( { ADDR_ASSERT(!"Unknown chip revision"); } - break; - - case FAMILY_YC: - if (ASICREV_IS_YELLOW_CARP(chipRevision)) + case FAMILY_RMB: + if (ASICREV_IS_REMBRANDT(chipRevision)) + { + m_settings.supportRbPlus = 1; + m_settings.dccUnsup3DSwDis = 0; + } + else + { + ADDR_ASSERT(!"Unknown chip revision"); + } + break; + case FAMILY_RPL: + if (ASICREV_IS_RAPHAEL(chipRevision)) + { + m_settings.supportRbPlus = 1; + m_settings.dccUnsup3DSwDis = 0; + } + break; + case FAMILY_MDN: + if (ASICREV_IS_MENDOCINO(chipRevision)) { m_settings.supportRbPlus = 1; m_settings.dccUnsup3DSwDis = 0; @@ -1087,9 +1086,7 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( { ADDR_ASSERT(!"Unknown chip revision"); } - break; - default: ADDR_ASSERT(!"Unknown chip family"); break; @@ -1449,13 +1446,15 @@ VOID Gfx10Lib::ConvertSwizzlePatternToEquation( ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern const { - ADDR_BIT_SETTING fullSwizzlePattern[20]; + // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list + ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT]; GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern; const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); - + memset(pEquation, 0, sizeof(ADDR_EQUATION)); pEquation->numBits = blockSizeLog2; + pEquation->numBitComponents = pPatInfo->maxItemCount; pEquation->stackedDepthSlices = FALSE; for (UINT_32 i = 0; i < elemLog2; i++) @@ -1983,37 +1982,45 @@ VOID Gfx10Lib::InitEquationTable() { memset(m_equationTable, 0, sizeof(m_equationTable)); + // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D) + // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at + // computing 2D resources. for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++) { + // Add offset. Start iterating from ADDR_RSRC_TEX_2D const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D); + // Iterate through the maximum number of swizzlemodes a type can hold for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++) { const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx); + // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp) for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++) { UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; + // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially + // overwriting the choice. const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1); if (pPatInfo != NULL) { ADDR_ASSERT(IsValidSwMode(swMode)); - - if (pPatInfo->maxItemCount <= 3) + if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex { ADDR_EQUATION equation = {}; + // Passing in pPatInfo to get the addr equation ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); equationIndex = m_numEquations; ADDR_ASSERT(equationIndex < EquationTableSize); - + // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo m_equationTable[equationIndex] = equation; - + // Increment m_numEquations m_numEquations++; } - else + else // There is no equationIndex { // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4)); @@ -2022,7 +2029,8 @@ VOID Gfx10Lib::InitEquationTable() ADDR_ASSERT(m_settings.supportRbPlus == 1); } } - + // equationIndex, which is used to look up equations in m_equationTable, will be cached for every + // iteration in this nested for-loop m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex; } } @@ -2307,15 +2315,15 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( { ADDR_E_RETURNCODE returnCode = ADDR_OK; - if (pIn->resourceType != ADDR_RSRC_TEX_2D) + if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE) { - // Only 2D resource can have a NonBC view... + // Only thin swizzle mode can have a NonBC view... returnCode = ADDR_INVALIDPARAMS; } - else if ((pIn->format != ADDR_FMT_ASTC_8x8) && + else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) && ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7))) { - // Only support BC1~BC7 or ASTC_8x8 for now... + // Only support BC1~BC7, ASTC, or ETC2 for now... returnCode = ADDR_NOTSUPPORTED; } else @@ -2328,14 +2336,15 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( infoIn.swizzleMode = pIn->swizzleMode; infoIn.resourceType = pIn->resourceType; infoIn.bpp = bpp; - infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth; - infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight; + infoIn.width = RoundUpQuotient(pIn->width, bcWidth); + infoIn.height = RoundUpQuotient(pIn->height, bcHeight); infoIn.numSlices = pIn->numSlices; infoIn.numMipLevels = pIn->numMipLevels; infoIn.numSamples = 1; infoIn.numFrags = 1; ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {}; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {}; infoOut.pMipInfo = mipInfo; @@ -2381,8 +2390,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( pOut->pipeBankXor = slicePbXorOut.pipeBankXor; const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE; - const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth; - const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight; + const UINT_32 requestMipWidth = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth); + const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight); if (inTail) { @@ -2432,10 +2441,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( pOut->mipId = 1; pOut->numMipLevels = 2; - const UINT_32 upperMipWidth = - PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth; - const UINT_32 upperMipHeight = - PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight; + const UINT_32 upperMipWidth = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth); + const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight); const BOOL_32 needToAvoidInTail = tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ? @@ -2511,7 +2518,6 @@ BOOL_32 Gfx10Lib::ValidateNonSwModeParams( const BOOL_32 tex1d = IsTex1d(rsrcType); const BOOL_32 stereo = flags.qbStereo; - // Resource type check if (tex1d) { @@ -2640,13 +2646,12 @@ BOOL_32 Gfx10Lib::ValidateSwModeParams( ADDR_ASSERT_ALWAYS(); valid = FALSE; } - } else if (tex3d) { if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) || (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) || - (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0))) + (thin3d && ((swizzleMask & Gfx10Rsrc3dViewAs2dSwModeMask) == 0))) { ADDR_ASSERT_ALWAYS(); valid = FALSE; @@ -2749,7 +2754,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck( * Gfx10Lib::HwlGetPreferredSurfaceSetting * * @brief -* Internal function to get suggested surface information for cliet to use +* Internal function to get suggested surface information for client to use * * @return * ADDR_E_RETURNCODE @@ -2818,7 +2823,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement); } - if (BlockTypeWithinMemoryBudget(padSize[0], + if (Addr2BlockTypeWithinMemoryBudget(padSize[0], padSize[1], ratioLow, ratioHi, @@ -2956,7 +2961,6 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( case ADDR_RSRC_TEX_2D: allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask; - break; case ADDR_RSRC_TEX_3D: @@ -2964,7 +2968,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( if (pIn->flags.view3dAs2dArray) { - allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask; + // SW_LINEAR can be used for 3D thin images, including BCn image format. + allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask; } break; @@ -3052,7 +3057,9 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( if (pIn->flags.needEquation) { - FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3)); + UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP : + ADDR_MAX_LEGACY_EQUATION_COMP; + FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components); } if (allowedSwModeSet.value == Gfx10LinearSwModeMask) @@ -3071,11 +3078,13 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( allowedSwModeSet.swLinear = 0; } + // A bitfield where each bit represents a block type. Each swizzle mode maps to a block. ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); // Determine block size if there are 2 or more block type candidates if (IsPow2(allowedBlockSet.value) == FALSE) { + // Tracks a valid SwizzleMode for each valid block type AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {}; swMode[AddrBlockLinear] = ADDR_SW_LINEAR; @@ -3098,19 +3107,21 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S; } + // Tracks the size of each valid swizzle mode's surface in bytes UINT_64 padSize[AddrBlockMaxTiledType] = {}; const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2); const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1); const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); - UINT_32 minSizeBlk = AddrBlockMicro; - UINT_64 minSize = 0; + UINT_32 minSizeBlk = AddrBlockMicro; // Tracks the most optimal block to use + UINT_64 minSize = 0; // Tracks the minimum acceptable block type ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; + // Iterate through all block types for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++) { - if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i))) + if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i))) { localIn.swizzleMode = swMode[i]; @@ -3134,7 +3145,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( } else { - if (BlockTypeWithinMemoryBudget( + // Checks if the block type is within the memory budget but favors larger blocks + if (Addr2BlockTypeWithinMemoryBudget( minSize, padSize[i], ratioLow, @@ -3183,9 +3195,9 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) { if ((i != minSizeBlk) && - IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i))) + Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i))) { - if (BlockTypeWithinMemoryBudget( + if (Addr2BlockTypeWithinMemoryBudget( minSize, padSize[i], 0, @@ -3522,7 +3534,6 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled( return ret; } - /** ************************************************************************************************************************ * Gfx10Lib::ComputeSurfaceInfoMicroTiled @@ -3676,6 +3687,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( UINT_64 mipSize[MaxMipLevels]; UINT_64 mipSliceSize[MaxMipLevels]; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); Dim3d fixedTailMaxDim = tailMaxDim; if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1)) @@ -3892,54 +3904,23 @@ UINT_32 Gfx10Lib::ComputeOffsetFromEquation( { UINT_32 v = 0; - if (pEq->addr[i].valid) + for (UINT_32 c = 0; c < pEq->numBitComponents; c++) { - if (pEq->addr[i].channel == 0) + if (pEq->comps[c][i].valid) { - v ^= (x >> pEq->addr[i].index) & 1; - } - else if (pEq->addr[i].channel == 1) - { - v ^= (y >> pEq->addr[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->addr[i].channel == 2); - v ^= (z >> pEq->addr[i].index) & 1; - } - } - - if (pEq->xor1[i].valid) - { - if (pEq->xor1[i].channel == 0) - { - v ^= (x >> pEq->xor1[i].index) & 1; - } - else if (pEq->xor1[i].channel == 1) - { - v ^= (y >> pEq->xor1[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->xor1[i].channel == 2); - v ^= (z >> pEq->xor1[i].index) & 1; - } - } - - if (pEq->xor2[i].valid) - { - if (pEq->xor2[i].channel == 0) - { - v ^= (x >> pEq->xor2[i].index) & 1; - } - else if (pEq->xor2[i].channel == 1) - { - v ^= (y >> pEq->xor2[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->xor2[i].channel == 2); - v ^= (z >> pEq->xor2[i].index) & 1; + if (pEq->comps[c][i].channel == 0) + { + v ^= (x >> pEq->comps[c][i].index) & 1; + } + else if (pEq->comps[c][i].channel == 1) + { + v ^= (y >> pEq->comps[c][i].index) & 1; + } + else + { + ADDR_ASSERT(pEq->comps[c][i].channel == 2); + v ^= (z >> pEq->comps[c][i].index) & 1; + } } } @@ -4068,6 +4049,8 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( UINT_32 numFrag ///< Number of fragment ) const { + // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from + // the right location const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2; const ADDR_SW_PATINFO* patInfo = NULL; const UINT_32 swizzleMask = 1 << swizzleMode; @@ -4130,8 +4113,15 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( { if (IsRtOptSwizzle(swizzleMode)) { - patInfo = m_settings.supportRbPlus ? - GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; + if (swizzleMode == ADDR_SW_4KB_R_X) + { + patInfo = NULL; + } + else + { + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; + } } else if (IsZOrderSwizzle(swizzleMode)) { @@ -4225,6 +4215,10 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( patInfo = m_settings.supportRbPlus ? GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO; } + else if (swizzleMode == ADDR_SW_4KB_R_X) + { + patInfo = NULL; + } else { ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X); @@ -4329,7 +4323,6 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( return (patInfo != NULL) ? &patInfo[index] : NULL; } - /** ************************************************************************************************************************ * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled @@ -4349,6 +4342,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled( ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; ADDR2_MIP_INFO mipInfo[MaxMipLevels]; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); localIn.swizzleMode = pIn->swizzleMode; localIn.flags = pIn->flags; @@ -4415,6 +4409,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled( ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; ADDR2_MIP_INFO mipInfo[MaxMipLevels]; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); localIn.swizzleMode = pIn->swizzleMode; localIn.flags = pIn->flags; |