summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Rowley <timothy.o.rowley@intel.com>2017-06-09 18:37:27 -0500
committerTim Rowley <timothy.o.rowley@intel.com>2017-06-16 16:20:16 -0500
commita6237e4b7fa4c14766b15fb3c638dce1e4b12ad9 (patch)
treed5c41a7ac7268409fe098918314f56c10b339ae9
parent9b448da60ffb5aa807d9145bbac0fdbd580acea9 (diff)
swr/rast: Fix read-back of viewport array index
Binner/clipper read viewport array index from the vertex header as needed. Move viewport state to BACKEND_STATE. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/binner.cpp129
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.cpp24
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h63
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp49
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.h8
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h4
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp2
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp12
10 files changed, 182 insertions, 117 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index eacce1cc5e9..ae9ced26f58 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -680,7 +680,7 @@ void SwrSetBlendFunc(
// update guardband multipliers for the viewport
void updateGuardbands(API_STATE *pState)
{
- uint32_t numGbs = pState->backendState.readRenderTargetArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
+ uint32_t numGbs = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
for(uint32_t i = 0; i < numGbs; ++i)
{
@@ -736,7 +736,7 @@ void SwrSetScissorRects(
void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
{
API_STATE *pState = &pDC->pState->state;
- uint32_t numScissors = pState->gsState.emitsViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
+ uint32_t numScissors = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
pState->scissorsTileAligned = true;
for (uint32_t index = 0; index < numScissors; ++index)
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index a73816b8bce..036d8b1e7da 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -434,8 +434,7 @@ void BinTriangles(
uint32_t workerId,
simdvector tri[3],
uint32_t triMask,
- simdscalari primID,
- simdscalari viewportIdx)
+ simdscalari primID)
{
SWR_CONTEXT *pContext = pDC->pContext;
@@ -451,6 +450,21 @@ void BinTriangles(
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
simdscalar vRecipW2 = _simd_set1_ps(1.0f);
+ // Read viewport array index if needed
+ simdscalari viewportIdx = _simd_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simdvector vpiAttrib[3];
+ pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd_and_si(vClearMask, vpai);
+ }
+
if (feState.vpTransformDisable)
{
// RHW is passed in directly when VP transform is disabled
@@ -478,7 +492,7 @@ void BinTriangles(
tri[2].v[2] = _simd_mul_ps(tri[2].v[2], vRecipW2);
// Viewport transform to screen space coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
}
@@ -661,7 +675,7 @@ void BinTriangles(
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@@ -863,8 +877,7 @@ void SIMDAPI BinTriangles_simd16(
uint32_t workerId,
simd16vector tri[3],
uint32_t triMask,
- simd16scalari primID,
- simd16scalari viewportIdx)
+ simd16scalari primID)
{
SWR_CONTEXT *pContext = pDC->pContext;
@@ -880,6 +893,20 @@ void SIMDAPI BinTriangles_simd16(
simd16scalar vRecipW0 = _simd16_set1_ps(1.0f);
simd16scalar vRecipW1 = _simd16_set1_ps(1.0f);
simd16scalar vRecipW2 = _simd16_set1_ps(1.0f);
+
+ simd16scalari viewportIdx = _simd16_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simd16vector vpiAttrib[3];
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai);
+ simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd16_and_si(vClearMask, vpai);
+ }
if (feState.vpTransformDisable)
{
@@ -908,7 +935,7 @@ void SIMDAPI BinTriangles_simd16(
tri[2].v[2] = _simd16_mul_ps(tri[2].v[2], vRecipW2);
// Viewport transform to screen space coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
}
@@ -1101,7 +1128,7 @@ void SIMDAPI BinTriangles_simd16(
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@@ -1524,7 +1551,7 @@ void BinPostSetupPoints(
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@@ -1672,8 +1699,7 @@ void BinPoints(
uint32_t workerId,
simdvector prim[3],
uint32_t primMask,
- simdscalari primID,
- simdscalari viewportIdx)
+ simdscalari primID)
{
simdvector& primVerts = prim[0];
@@ -1681,6 +1707,21 @@ void BinPoints(
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_RASTSTATE& rastState = state.rastState;
+ // Read back viewport index if required
+ simdscalari viewportIdx = _simd_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simdvector vpiAttrib[1];
+ pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+
+ // OOB indices => forced to zero.
+ vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd_and_si(vClearMask, vpai);
+ }
+
if (!feState.vpTransformDisable)
{
// perspective divide
@@ -1690,7 +1731,7 @@ void BinPoints(
primVerts.z = _simd_mul_ps(primVerts.z, vRecipW0);
// viewport transform to screen coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<1>(&primVerts, state.vpMatrices, viewportIdx);
}
@@ -1898,7 +1939,7 @@ void BinPostSetupPoints_simd16(
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@@ -2040,8 +2081,7 @@ void SIMDAPI BinPoints_simd16(
uint32_t workerId,
simd16vector prim[3],
uint32_t primMask,
- simd16scalari primID,
- simd16scalari viewportIdx)
+ simd16scalari primID)
{
simd16vector& primVerts = prim[0];
@@ -2049,6 +2089,21 @@ void SIMDAPI BinPoints_simd16(
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_RASTSTATE& rastState = state.rastState;
+ // Read back viewport index if required
+ simd16scalari viewportIdx = _simd16_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simd16vector vpiAttrib[1];
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai)
+ simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd16_and_si(vClearMask, vpai);
+ }
+
if (!feState.vpTransformDisable)
{
// perspective divide
@@ -2059,7 +2114,7 @@ void SIMDAPI BinPoints_simd16(
primVerts.z = _simd16_mul_ps(primVerts.z, vRecipW0);
// viewport transform to screen coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<1>(&primVerts, state.vpMatrices, viewportIdx);
}
@@ -2165,7 +2220,7 @@ void BinPostSetupLines(
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@@ -2370,7 +2425,7 @@ void BinPostSetupLines_simd16(
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@@ -2533,8 +2588,7 @@ void BinLines(
uint32_t workerId,
simdvector prim[],
uint32_t primMask,
- simdscalari primID,
- simdscalari viewportIdx)
+ simdscalari primID)
{
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@@ -2542,6 +2596,20 @@ void BinLines(
simdscalar vRecipW[2] = { _simd_set1_ps(1.0f), _simd_set1_ps(1.0f) };
+ simdscalari viewportIdx = _simd_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simdvector vpiAttrib[2];
+ pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
+
+ // OOB indices => forced to zero.
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd_and_si(vClearMask, vpai);
+ }
+
if (!feState.vpTransformDisable)
{
// perspective divide
@@ -2558,7 +2626,7 @@ void BinLines(
prim[1].v[2] = _simd_mul_ps(prim[1].v[2], vRecipW[1]);
// viewport transform to screen coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
}
@@ -2594,8 +2662,7 @@ void SIMDAPI BinLines_simd16(
uint32_t workerId,
simd16vector prim[3],
uint32_t primMask,
- simd16scalari primID,
- simd16scalari viewportIdx)
+ simd16scalari primID)
{
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@@ -2603,6 +2670,20 @@ void SIMDAPI BinLines_simd16(
simd16scalar vRecipW[2] = { _simd16_set1_ps(1.0f), _simd16_set1_ps(1.0f) };
+ simd16scalari viewportIdx = _simd16_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simd16vector vpiAttrib[2];
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai);
+ simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd16_and_si(vClearMask, vpai);
+ }
+
if (!feState.vpTransformDisable)
{
// perspective divide
@@ -2619,7 +2700,7 @@ void SIMDAPI BinLines_simd16(
prim[1].v[2] = _simd16_mul_ps(prim[1].v[2], vRecipW[1]);
// viewport transform to screen coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index c93e0fb534a..bd62b58f32a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -160,35 +160,35 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
return i;
}
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<3> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipTriangles, 1);
}
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipLines, pDC->drawId);
Clipper<2> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipLines, 1);
}
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipPoints, pDC->drawId);
Clipper<1> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipPoints, 1);
}
#if USE_SIMD16_FRONTEND
-void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
+void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipTriangles, pDC->drawId);
@@ -198,12 +198,12 @@ void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t work
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipTriangles, 1);
}
-void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
+void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipLines, pDC->drawId);
@@ -213,12 +213,12 @@ void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipLines, 1);
}
-void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
+void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipPoints, pDC->drawId);
@@ -228,7 +228,7 @@ void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipPoints, 1);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 92356189673..12b52c5847e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -459,7 +459,7 @@ public:
#endif
// clip SIMD primitives
- void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
+ void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId)
{
// input/output vertex store for clipper
simdvertex vertices[7]; // maximum 7 verts generated per triangle
@@ -559,7 +559,6 @@ public:
uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
- uint32_t* pViewportIdx = (uint32_t*)&vViewportIdx;
const simdscalari vOffsets = _mm256_set_epi32(
0 * sizeof(simdvertex), // unused lane
@@ -697,7 +696,7 @@ public:
}
clipPa.useAlternateOffset = false;
- pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]), _simd_set1_epi32(pViewportIdx[inputPrim]));
+ pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
}
#else
simdvector attrib[NumVertsPerPrim];
@@ -705,7 +704,7 @@ public:
if (assemble)
{
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
- pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]), _simd_set1_epi32(pViewportIdx[inputPrim]));
+ pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
}
#endif
} while (clipPa.NextPrim());
@@ -717,7 +716,7 @@ public:
}
#if USE_SIMD16_FRONTEND
- void ClipSimd(const simd16scalar& vPrimMask, const simd16scalar& vClipMask, PA_STATE& pa, const simd16scalari& vPrimId, const simd16scalari& vViewportIdx)
+ void ClipSimd(const simd16scalar& vPrimMask, const simd16scalar& vClipMask, PA_STATE& pa, const simd16scalari& vPrimId)
{
// input/output vertex store for clipper
simd16vertex vertices[7]; // maximum 7 verts generated per triangle
@@ -817,7 +816,6 @@ public:
uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
- uint32_t* pViewportIdx = (uint32_t*)&vViewportIdx;
const simdscalari vOffsets = _simd_set_epi32(
0 * sizeof(simd16vertex), // unused lane
@@ -928,7 +926,7 @@ public:
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff };
clipPa.useAlternateOffset = false;
- pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd16_set1_epi32(pPrimitiveId[inputPrim]), _simd16_set1_epi32(pViewportIdx[inputPrim]));
+ pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd16_set1_epi32(pPrimitiveId[inputPrim]));
}
} while (clipPa.NextPrim());
@@ -945,7 +943,7 @@ public:
#endif
// execute the clipper stage
- void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
+ void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId)
{
SWR_ASSERT(this->pDC != nullptr);
SWR_CONTEXT* pContext = this->pDC->pContext;
@@ -973,6 +971,20 @@ public:
// update clipper invocations pipeline stat
uint32_t numInvoc = _mm_popcnt_u32(primMask);
UPDATE_STAT_FE(CInvocations, numInvoc);
+
+ // Read back viewport index if required
+ simdscalari viewportIdx = _simd_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simdvector vpiAttrib[NumVertsPerPrim];
+ pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+
+ // OOB indices => forced to zero.
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd_and_si(vClearMask, vpai);
+ }
ComputeClipCodes(prim, viewportIdx);
@@ -1001,7 +1013,7 @@ public:
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx);
+ ClipSimd(vMask(primMask), vMask(clipMask), pa, primId);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)
@@ -1010,12 +1022,12 @@ public:
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
- pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
+ pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
}
}
#if USE_SIMD16_FRONTEND
- void ExecuteStage(PA_STATE& pa, simd16vector prim[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
+ void ExecuteStage(PA_STATE& pa, simd16vector prim[], uint32_t primMask, simd16scalari primId)
{
SWR_ASSERT(pa.pDC != nullptr);
SWR_CONTEXT* pContext = pa.pDC->pContext;
@@ -1043,6 +1055,19 @@ public:
uint32_t numInvoc = _mm_popcnt_u32(primMask);
UPDATE_STAT_FE(CInvocations, numInvoc);
+ // Read back viewport index if required
+ simd16scalari viewportIdx = _simd16_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simd16vector vpiAttrib[NumVertsPerPrim];
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd16_and_si(vClearMask, vpai);
+ }
ComputeClipCodes(prim, viewportIdx);
// cull prims with NAN coords
@@ -1070,7 +1095,7 @@ public:
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(vMask16(primMask), vMask16(clipMask), pa, primId, viewportIdx);
+ ClipSimd(vMask16(primMask), vMask16(clipMask), pa, primId);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)
@@ -1079,7 +1104,7 @@ public:
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
- pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
+ pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
}
}
@@ -1854,12 +1879,12 @@ private:
// pipeline stage functions
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
#if USE_SIMD16_FRONTEND
-void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
-void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
-void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
+void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
+void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
+void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index f60ddfd77ef..81bf9ff7114 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -214,12 +214,12 @@ struct PA_STATE;
// function signature for pipeline stages that execute after primitive assembly
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
- uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
+ uint32_t primMask, simdscalari primID);
#if ENABLE_AVX512_SIMD16
// function signature for pipeline stages that execute after primitive assembly
typedef void(SIMDAPI *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
- uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
+ uint32_t primMask, simd16scalari primID);
#endif
OSALIGNLINE(struct) API_STATE
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index c11a35acd40..1cd166d83ff 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -950,48 +950,11 @@ static void GeometryShaderStage(
#if USE_SIMD16_FRONTEND
simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
- // use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
- simd16scalari vViewPortIdx;
- if (state.gsState.emitsViewportArrayIndex)
- {
- simd16vector vpiAttrib[3];
- gsPa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
-
- // OOB indices => forced to zero.
- simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
- simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
- vViewPortIdx = _simd16_and_si(vClearMask, vpai);
- }
- else
- {
- vViewPortIdx = _simd16_set1_epi32(0);
- }
-
gsPa.useAlternateOffset = false;
- pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
+ pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId);
#else
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
-
- // use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
- simdscalari vViewPortIdx;
- if (state.gsState.emitsViewportArrayIndex)
- {
- simdvector vpiAttrib[3];
- gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
-
- // OOB indices => forced to zero.
- simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
- vViewPortIdx = _simd_and_si(vClearMask, vpai);
- }
- else
- {
- vViewPortIdx = _simd_set1_epi32(0);
- }
-
- pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
+ pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
#endif
}
}
@@ -1340,10 +1303,10 @@ static void TessellationStages(
SWR_ASSERT(pfnClipFunc);
#if USE_SIMD16_FRONTEND
tessPa.useAlternateOffset = false;
- pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_set1_epi32(0));
+ pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID);
#else
pfnClipFunc(pDC, tessPa, workerId, prim,
- GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0));
+ GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID));
#endif
}
}
@@ -1702,7 +1665,7 @@ void ProcessDraw(
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
pa.useAlternateOffset = false;
- pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_setzero_si());
+ pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID);
}
}
}
@@ -1864,7 +1827,7 @@ void ProcessDraw(
SWR_ASSERT(pDC->pState->pfnProcessPrims);
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
- GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), _simd_set1_epi32(0));
+ GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID));
}
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index 65b7f02813f..3c2361e85dd 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -388,10 +388,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
#endif
struct PA_STATE_BASE; // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
+void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
+void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
#if USE_SIMD16_FRONTEND
-void SIMDAPI BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
-void SIMDAPI BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
+void SIMDAPI BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID);
+void SIMDAPI BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID);
#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 94a507139c1..2440d445728 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -710,9 +710,6 @@ struct SWR_GS_STATE
// instance count
uint32_t instanceCount;
- // geometry shader emits ViewportArrayIndex
- bool emitsViewportArrayIndex;
-
// if true, geometry shader emits a single stream, with separate cut buffer.
// if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
// to map vertices to streams
@@ -1049,6 +1046,7 @@ struct SWR_BACKEND_STATE
SWR_ATTRIB_SWIZZLE swizzleMap[32];
bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
+ bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
};
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index f4029be89aa..dfc54fa7125 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -547,8 +547,6 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
- pGS->emitsViewportArrayIndex = info->writes_viewport_index;
-
// XXX: single stream for now...
pGS->isSingleStream = true;
pGS->singleStreamID = 0;
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 19d961f05ae..c87393c57f1 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1755,12 +1755,12 @@ swr_update_derived(struct pipe_context *pipe,
(ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
- if (ctx->gs)
- backendState.readRenderTargetArrayIndex =
- ctx->gs->info.base.writes_layer;
- else
- backendState.readRenderTargetArrayIndex =
- ctx->vs->info.base.writes_layer;
+ struct tgsi_shader_info *pLastFE =
+ ctx->gs ?
+ &ctx->gs->info.base :
+ &ctx->vs->info.base;
+ backendState.readRenderTargetArrayIndex = pLastFE->writes_layer;
+ backendState.readViewportArrayIndex = pLastFE->writes_viewport_index;
SwrSetBackendState(ctx->swrContext, &backendState);