summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2021-01-11 14:52:00 -0500
committerMarge Bot <eric+marge@anholt.net>2021-01-22 16:45:30 +0000
commit59a478b84346fd83f13ae608c852e2be6bcf49f3 (patch)
tree1ac0562ff8460988e75bb08a8f4bfb0d4ac38b95
parent5013828863010a1feded43c20d74250c48eff757 (diff)
radeonsi: move if (sctx->vertex_buffers_dirty) into the upload function
This looks unnecessary, but the next commit will build upon it and add more stuff into the function. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8653>
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.cpp166
1 files changed, 84 insertions, 82 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 7921c97d9eb..ae0878b111b 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -1327,100 +1327,104 @@ void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx)
template <chip_class GFX_VERSION> ALWAYS_INLINE
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
- struct si_vertex_elements *velems = sctx->vertex_elements;
- unsigned alloc_size = velems->vb_desc_list_alloc_size;
- uint32_t *ptr;
+ if (sctx->vertex_buffers_dirty) {
+ unsigned count = sctx->num_vertex_elements;
+ assert(count);
+ assert(count <= SI_MAX_ATTRIBS);
+
+ struct si_vertex_elements *velems = sctx->vertex_elements;
+ unsigned alloc_size = velems->vb_desc_list_alloc_size;
+ uint32_t *ptr;
+
+ if (alloc_size) {
+ /* Vertex buffer descriptors are the only ones which are uploaded
+ * directly through a staging buffer and don't go through
+ * the fine-grained upload path.
+ */
+ u_upload_alloc(sctx->b.const_uploader, 0, alloc_size,
+ si_optimal_tcc_alignment(sctx, alloc_size), &sctx->vb_descriptors_offset,
+ (struct pipe_resource **)&sctx->vb_descriptors_buffer, (void **)&ptr);
+ if (!sctx->vb_descriptors_buffer) {
+ sctx->vb_descriptors_offset = 0;
+ sctx->vb_descriptors_gpu_list = NULL;
+ return false;
+ }
- if (alloc_size) {
- /* Vertex buffer descriptors are the only ones which are uploaded
- * directly through a staging buffer and don't go through
- * the fine-grained upload path.
- */
- u_upload_alloc(sctx->b.const_uploader, 0, alloc_size,
- si_optimal_tcc_alignment(sctx, alloc_size), &sctx->vb_descriptors_offset,
- (struct pipe_resource **)&sctx->vb_descriptors_buffer, (void **)&ptr);
- if (!sctx->vb_descriptors_buffer) {
- sctx->vb_descriptors_offset = 0;
- sctx->vb_descriptors_gpu_list = NULL;
- return false;
+ sctx->vb_descriptors_gpu_list = ptr;
+ radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer,
+ RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
+ sctx->vertex_buffer_pointer_dirty = true;
+ sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
+ } else {
+ si_resource_reference(&sctx->vb_descriptors_buffer, NULL);
+ sctx->vertex_buffer_pointer_dirty = false;
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_VBO_DESCRIPTORS;
}
- sctx->vb_descriptors_gpu_list = ptr;
- radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer, RADEON_USAGE_READ,
- RADEON_PRIO_DESCRIPTORS);
- sctx->vertex_buffer_pointer_dirty = true;
- sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
- } else {
- si_resource_reference(&sctx->vb_descriptors_buffer, NULL);
- sctx->vertex_buffer_pointer_dirty = false;
- sctx->prefetch_L2_mask &= ~SI_PREFETCH_VBO_DESCRIPTORS;
- }
+ unsigned first_vb_use_mask = velems->first_vb_use_mask;
+ unsigned num_vbos_in_user_sgprs = sctx->screen->num_vbos_in_user_sgprs;
- unsigned count = sctx->num_vertex_elements;
- assert(count <= SI_MAX_ATTRIBS);
+ for (unsigned i = 0; i < count; i++) {
+ struct pipe_vertex_buffer *vb;
+ struct si_resource *buf;
+ unsigned vbo_index = velems->vertex_buffer_index[i];
+ uint32_t *desc = i < num_vbos_in_user_sgprs ? &sctx->vb_descriptor_user_sgprs[i * 4]
+ : &ptr[(i - num_vbos_in_user_sgprs) * 4];
- unsigned first_vb_use_mask = velems->first_vb_use_mask;
- unsigned num_vbos_in_user_sgprs = sctx->screen->num_vbos_in_user_sgprs;
+ vb = &sctx->vertex_buffer[vbo_index];
+ buf = si_resource(vb->buffer.resource);
+ if (!buf) {
+ memset(desc, 0, 16);
+ continue;
+ }
- for (unsigned i = 0; i < count; i++) {
- struct pipe_vertex_buffer *vb;
- struct si_resource *buf;
- unsigned vbo_index = velems->vertex_buffer_index[i];
- uint32_t *desc = i < num_vbos_in_user_sgprs ? &sctx->vb_descriptor_user_sgprs[i * 4]
- : &ptr[(i - num_vbos_in_user_sgprs) * 4];
+ int64_t offset = (int64_t)((int)vb->buffer_offset) + velems->src_offset[i];
- vb = &sctx->vertex_buffer[vbo_index];
- buf = si_resource(vb->buffer.resource);
- if (!buf) {
- memset(desc, 0, 16);
- continue;
- }
+ if (offset >= buf->b.b.width0) {
+ assert(offset < buf->b.b.width0);
+ memset(desc, 0, 16);
+ continue;
+ }
- int64_t offset = (int64_t)((int)vb->buffer_offset) + velems->src_offset[i];
+ uint64_t va = buf->gpu_address + offset;
- if (offset >= buf->b.b.width0) {
- assert(offset < buf->b.b.width0);
- memset(desc, 0, 16);
- continue;
- }
+ int64_t num_records = (int64_t)buf->b.b.width0 - offset;
+ if (GFX_VERSION != GFX8 && vb->stride) {
+ /* Round up by rounding down and adding 1 */
+ num_records = (num_records - velems->format_size[i]) / vb->stride + 1;
+ }
+ assert(num_records >= 0 && num_records <= UINT_MAX);
- uint64_t va = buf->gpu_address + offset;
+ uint32_t rsrc_word3 = velems->rsrc_word3[i];
- int64_t num_records = (int64_t)buf->b.b.width0 - offset;
- if (GFX_VERSION != GFX8 && vb->stride) {
- /* Round up by rounding down and adding 1 */
- num_records = (num_records - velems->format_size[i]) / vb->stride + 1;
+ /* OOB_SELECT chooses the out-of-bounds check:
+ * - 1: index >= NUM_RECORDS (Structured)
+ * - 3: offset >= NUM_RECORDS (Raw)
+ */
+ if (GFX_VERSION >= GFX10)
+ rsrc_word3 |= S_008F0C_OOB_SELECT(vb->stride ? V_008F0C_OOB_SELECT_STRUCTURED
+ : V_008F0C_OOB_SELECT_RAW);
+
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vb->stride);
+ desc[2] = num_records;
+ desc[3] = rsrc_word3;
+
+ if (first_vb_use_mask & (1 << i)) {
+ radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(vb->buffer.resource),
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
+ }
}
- assert(num_records >= 0 && num_records <= UINT_MAX);
- uint32_t rsrc_word3 = velems->rsrc_word3[i];
-
- /* OOB_SELECT chooses the out-of-bounds check:
- * - 1: index >= NUM_RECORDS (Structured)
- * - 3: offset >= NUM_RECORDS (Raw)
- */
- if (GFX_VERSION >= GFX10)
- rsrc_word3 |= S_008F0C_OOB_SELECT(vb->stride ? V_008F0C_OOB_SELECT_STRUCTURED
- : V_008F0C_OOB_SELECT_RAW);
-
- desc[0] = va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vb->stride);
- desc[2] = num_records;
- desc[3] = rsrc_word3;
-
- if (first_vb_use_mask & (1 << i)) {
- radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(vb->buffer.resource),
- RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
- }
+ /* Don't flush the const cache. It would have a very negative effect
+ * on performance (confirmed by testing). New descriptors are always
+ * uploaded to a fresh new buffer, so I don't think flushing the const
+ * cache is needed. */
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
+ sctx->vertex_buffer_user_sgprs_dirty = num_vbos_in_user_sgprs > 0;
+ sctx->vertex_buffers_dirty = false;
}
- /* Don't flush the const cache. It would have a very negative effect
- * on performance (confirmed by testing). New descriptors are always
- * uploaded to a fresh new buffer, so I don't think flushing the const
- * cache is needed. */
- si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
- sctx->vertex_buffer_user_sgprs_dirty = num_vbos_in_user_sgprs > 0;
- sctx->vertex_buffers_dirty = false;
return true;
}
@@ -2033,9 +2037,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
si_gfx_resources_add_all_to_bo_list(sctx);
if (unlikely(!si_upload_graphics_shader_descriptors(sctx) ||
- (sctx->vertex_buffers_dirty &&
- sctx->num_vertex_elements &&
- !si_upload_vertex_buffer_descriptors<GFX_VERSION>(sctx)))) {
+ !si_upload_vertex_buffer_descriptors<GFX_VERSION>(sctx))) {
DRAW_CLEANUP;
return;
}