summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorKrzysztof Raszkowski <krzysztof.raszkowski@intel.com>2019-10-29 14:50:02 +0000
committerJan Zielinski <jan.zielinski@intel.com>2019-10-29 14:50:02 +0000
commit163d5fde06696fed2e69e000a7621087c1636749 (patch)
tree23c7c0c901996e06dab1b747247bf95e6a484322 /src/gallium/drivers
parent44971b84b70b35e260b09493b6f75304cd58965f (diff)
gallium/swr: Enable GL_ARB_gpu_shader5: multiple streams
Added support for geometry shader multiple streams (part of GL_ARB_gpu_shader5 extension). Reviewed-by: Jan Zielinski <jan.zielinski@intel.com>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp4
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp2
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp75
3 files changed, 70 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 13e92e8640a..ab079ab4aa0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -702,8 +702,8 @@ void ProcessStreamIdBuffer(uint32_t stream,
{
SWR_ASSERT(stream < MAX_SO_STREAMS);
- uint32_t numInputBytes = (numEmittedVerts * 2 + 7) / 8;
- uint32_t numOutputBytes = std::max(numInputBytes / 2, 1U);
+ uint32_t numInputBytes = AlignUp(numEmittedVerts * 2, 8) / 8;
+ uint32_t numOutputBytes = AlignUp(numEmittedVerts, 8) / 8;
for (uint32_t b = 0; b < numOutputBytes; ++b)
{
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index 030b62a15ae..6c596a463b4 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -191,7 +191,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
return 1024;
case PIPE_CAP_MAX_VERTEX_STREAMS:
- return 1;
+ return 4;
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
return 2048;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index c8e34b8adb8..e5e5411fb10 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -251,7 +251,8 @@ struct BuilderSWR : public Builder {
swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
struct lp_build_context * bld,
LLVMValueRef (*outputs)[4],
- LLVMValueRef emitted_vertices_vec);
+ LLVMValueRef emitted_vertices_vec,
+ LLVMValueRef stream_id);
void
swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
@@ -306,13 +307,15 @@ static void
swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
struct lp_build_context * bld,
LLVMValueRef (*outputs)[4],
- LLVMValueRef emitted_vertices_vec)
+ LLVMValueRef emitted_vertices_vec,
+ LLVMValueRef stream_id)
{
swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld,
outputs,
- emitted_vertices_vec);
+ emitted_vertices_vec,
+ stream_id);
}
static void
@@ -411,12 +414,12 @@ void
BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
struct lp_build_context * bld,
LLVMValueRef (*outputs)[4],
- LLVMValueRef emitted_vertices_vec)
+ LLVMValueRef emitted_vertices_vec,
+ LLVMValueRef stream_id)
{
swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE;
const uint32_t attribSize = 4 * sizeof(float);
const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS;
@@ -478,6 +481,49 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
}
}
+ /* When the output type is not points, the geometry shader may not
+ * output data to multiple streams. So early exit here.
+ */
+ if(iface->pGsState->outputTopology != TOP_POINT_LIST) {
+ STACKRESTORE(pStack);
+ return;
+ }
+
+ // Info about stream id for each vertex
+ // is coded in 2 bits (4 vert per byte "box"):
+ // ----------------- ----------------- ----
+ // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
+ // ----------------- ----------------- ----
+
+ // Calculate where need to put stream id for current vert
+ // in 1 byte "box".
+ Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2));
+
+ // Calculate in which box put stream id for current vert.
+ Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2));
+
+ // Skip count header
+ Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE));
+
+ for (uint32_t lane = 0; lane < mVWidth; ++lane) {
+ Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty);
+ Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
+
+ Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane)));
+
+ // Just make sure that not overflow max - stream id = (0,1,2,3)
+ Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty);
+
+ // Shift it to correct position in byte "box"
+ vVal = SHL(vVal, pShift);
+
+ // Info about other vertices can be already stored
+ // so we need to read and add bits from current vert info.
+ Value *storedValue = LOAD(pStreamOffset);
+ vVal = OR(storedValue, vVal);
+ STORE(vVal, pStreamOffset);
+ }
+
STACKRESTORE(pStack);
}
@@ -491,6 +537,15 @@ BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
{
swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
+ /* When the output type is points, the geometry shader may output data
+ * to multiple streams, and end_primitive has no effect. Info about
+ * stream id for vertices is stored into the same place in memory where
+ * end primitive info is stored so early exit in this case.
+ */
+ if (iface->pGsState->outputTopology == TOP_POINT_LIST) {
+ return;
+ }
+
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
@@ -569,9 +624,13 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
- // XXX: single stream for now...
- pGS->isSingleStream = true;
- pGS->singleStreamID = 0;
+ // If point primitive then assume to use multiple streams
+ if(pGS->outputTopology == TOP_POINT_LIST) {
+ pGS->isSingleStream = false;
+ } else {
+ pGS->isSingleStream = true;
+ pGS->singleStreamID = 0;
+ }
pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;