diff options
Diffstat (limited to 'src/gallium/auxiliary/util/u_vbuf.c')
-rw-r--r-- | src/gallium/auxiliary/util/u_vbuf.c | 854 |
1 files changed, 472 insertions, 382 deletions
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index ef28c628da9..9809bcc2388 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -92,10 +92,10 @@ #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_memory.h" -#include "indices/u_primconvert.h" #include "util/u_prim_restart.h" #include "util/u_screen.h" #include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" #include "translate/translate.h" #include "translate/translate_cache.h" #include "cso_cache/cso_cache.h" @@ -112,6 +112,9 @@ struct u_vbuf_elements { * its vertex data must be translated to native_format[i]. */ enum pipe_format native_format[PIPE_MAX_ATTRIBS]; unsigned native_format_size[PIPE_MAX_ATTRIBS]; + unsigned component_size[PIPE_MAX_ATTRIBS]; + /* buffer-indexed */ + unsigned strides[PIPE_MAX_ATTRIBS]; /* Which buffers are used by the vertex element state. */ uint32_t used_vb_mask; @@ -127,6 +130,7 @@ struct u_vbuf_elements { /* Which buffer has at least one vertex element referencing it * compatible. */ uint32_t compatible_vb_mask_any; + uint32_t vb_align_mask[2]; //which buffers require 2/4 byte alignments /* Which buffer has all vertex elements referencing it compatible. */ uint32_t compatible_vb_mask_all; @@ -137,6 +141,12 @@ struct u_vbuf_elements { /* Which buffers are used by multiple vertex attribs. */ uint32_t interleaved_vb_mask; + /* Which buffer has a non-zero stride. */ + uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ + + /* Which buffer is incompatible (unaligned). */ + uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ + void *driver_cso; }; @@ -161,13 +171,16 @@ struct u_vbuf { /* This is what was set in set_vertex_buffers. * May contain user buffers. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + uint8_t num_vertex_buffers; + uint8_t num_real_vertex_buffers; + bool vertex_buffers_dirty; uint32_t enabled_vb_mask; + uint32_t unaligned_vb_mask[2]; //16/32bit + /* Vertex buffers for the driver. * There are usually no user buffers. */ struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS]; - uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last - call of set_vertex_buffers */ /* Vertex elements. */ struct u_vbuf_elements *ve, *ve_saved; @@ -176,7 +189,7 @@ struct u_vbuf { struct cso_velems_state fallback_velems; /* If non-NULL, this is a vertex element state used for the translate * fallback and therefore used for rendering too. */ - boolean using_translate; + bool using_translate; /* The vertex buffer slot index where translated vertices have been * stored in. */ unsigned fallback_vbs[VB_NUM]; @@ -186,8 +199,6 @@ struct u_vbuf { uint32_t user_vb_mask; /* each bit describes a corresp. buffer */ /* Which buffer is incompatible (unaligned). */ uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ - /* Which buffer has a non-zero stride. */ - uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ /* Which buffers are allowed (supported by hardware). */ uint32_t allowed_vb_mask; }; @@ -303,6 +314,11 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, caps->velem_src_offset_unaligned = !screen->get_param(screen, PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY); + caps->attrib_component_unaligned = + !screen->get_param(screen, + PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY); + assert(caps->attrib_component_unaligned || + (caps->velem_src_offset_unaligned && caps->buffer_stride_unaligned && caps->buffer_offset_unaligned)); caps->user_vertex_buffers = screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS); caps->max_vertex_buffers = @@ -312,13 +328,13 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX)) { caps->rewrite_restart_index = screen->get_param(screen, PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART); caps->supported_restart_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART); - caps->supported_restart_modes |= BITFIELD_BIT(PIPE_PRIM_PATCHES); - if (caps->supported_restart_modes != BITFIELD_MASK(PIPE_PRIM_MAX)) + caps->supported_restart_modes |= BITFIELD_BIT(MESA_PRIM_PATCHES); + if (caps->supported_restart_modes != BITFIELD_MASK(MESA_PRIM_COUNT)) caps->fallback_always = true; caps->fallback_always |= caps->rewrite_restart_index; } caps->supported_prim_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES); - if (caps->supported_prim_modes != BITFIELD_MASK(PIPE_PRIM_MAX)) + if (caps->supported_prim_modes != BITFIELD_MASK(MESA_PRIM_COUNT)) caps->fallback_always = true; if (!screen->is_format_supported(screen, PIPE_FORMAT_R8_UINT, PIPE_BUFFER, 0, 0, PIPE_BIND_INDEX_BUFFER)) @@ -330,6 +346,7 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, if (!caps->buffer_offset_unaligned || !caps->buffer_stride_unaligned || + !caps->attrib_component_unaligned || !caps->velem_src_offset_unaligned) caps->fallback_always = true; @@ -346,8 +363,8 @@ u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps) mgr->pipe = pipe; if (caps->rewrite_ubyte_ibs || caps->rewrite_restart_index || /* require all but patches */ - ((caps->supported_prim_modes & caps->supported_restart_modes & BITFIELD_MASK(PIPE_PRIM_MAX))) != - BITFIELD_MASK(PIPE_PRIM_MAX)) { + ((caps->supported_prim_modes & caps->supported_restart_modes & BITFIELD_MASK(MESA_PRIM_COUNT))) != + BITFIELD_MASK(MESA_PRIM_COUNT)) { struct primconvert_config cfg; cfg.fixed_prim_restart = caps->rewrite_restart_index; cfg.primtypes_mask = caps->supported_prim_modes; @@ -383,9 +400,9 @@ u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, /* need to include the count into the stored state data too. */ key_size = sizeof(struct pipe_vertex_element) * velems->count + sizeof(unsigned); - hash_key = cso_construct_key((void*)velems, key_size); + hash_key = cso_construct_key(velems, key_size); iter = cso_find_state_template(&mgr->cso_cache, hash_key, CSO_VELEMENTS, - (void*)velems, key_size); + velems, key_size); if (cso_hash_iter_is_null(iter)) { struct cso_velements *cso = MALLOC_STRUCT(cso_velements); @@ -425,12 +442,9 @@ void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr) void u_vbuf_destroy(struct u_vbuf *mgr) { - struct pipe_screen *screen = mgr->pipe->screen; unsigned i; - const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX, - PIPE_SHADER_CAP_MAX_INPUTS); - mgr->pipe->set_vertex_buffers(mgr->pipe, 0, 0, num_vb, false, NULL); + mgr->pipe->set_vertex_buffers(mgr->pipe, 0, NULL); for (i = 0; i < PIPE_MAX_ATTRIBS; i++) pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]); @@ -451,7 +465,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, const struct pipe_draw_start_count_bias *draw, unsigned vb_mask, unsigned out_vb, int start_vertex, unsigned num_vertices, - int min_index, boolean unroll_indices) + int min_index, bool unroll_indices) { struct translate *tr; struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; @@ -469,14 +483,15 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, unsigned offset; uint8_t *map; unsigned i = u_bit_scan(&mask); + unsigned stride = mgr->ve->strides[i]; vb = &mgr->vertex_buffer[i]; - offset = vb->buffer_offset + vb->stride * start_vertex; + offset = vb->buffer_offset + stride * start_vertex; if (vb->is_user_buffer) { map = (uint8_t*)vb->buffer.user + offset; } else { - unsigned size = vb->stride ? num_vertices * vb->stride + unsigned size = stride ? num_vertices * stride : sizeof(double)*4; if (!vb->buffer.resource) { @@ -485,15 +500,16 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, continue; } - if (vb->stride) { + if (stride) { /* the stride cannot be used to calculate the map size of the buffer, * as it only determines the bytes between elements, not the size of elements * themselves, meaning that if stride < element_size, the mapped size will * be too small and conversion will overrun the map buffer * - * instead, add the size of the largest possible attribute to ensure the map is large enough + * instead, add the size of the largest possible attribute to the final attribute's offset + * in order to ensure the map is large enough */ - unsigned last_offset = offset + size - vb->stride; + unsigned last_offset = size - stride; size = MAX2(size, last_offset + sizeof(double)*4); } @@ -512,7 +528,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, * crashing (by reading past the end of a hardware buffer mapping) * when people do that. */ - num_vertices = (size + vb->stride - 1) / vb->stride; + num_vertices = (size + stride - 1) / stride; } map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size, @@ -521,10 +537,10 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, /* Subtract min_index so that indexing with the index buffer works. */ if (unroll_indices) { - map -= (ptrdiff_t)vb->stride * min_index; + map -= (ptrdiff_t)stride * min_index; } - tr->set_buffer(tr, i, map, vb->stride, info->max_index); + tr->set_buffer(tr, i, map, stride, info->max_index); } /* Translate. */ @@ -592,7 +608,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, /* Setup the new vertex buffer. */ mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset; - mgr->real_vertex_buffer[out_vb].stride = key->output_stride; /* Move the buffer reference. */ pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]); @@ -602,7 +617,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, return PIPE_OK; } -static boolean +static bool u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, unsigned mask[VB_NUM]) { @@ -610,14 +625,14 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, unsigned fallback_vbs[VB_NUM]; /* Set the bit for each buffer which is incompatible, or isn't set. */ uint32_t unused_vb_mask = - mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | + mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask | ~mgr->enabled_vb_mask; uint32_t unused_vb_mask_orig; - boolean insufficient_buffers = false; + bool insufficient_buffers = false; /* No vertex buffers available at all */ if (!unused_vb_mask) - return FALSE; + return false; memset(fallback_vbs, ~0, sizeof(fallback_vbs)); mgr->fallback_vbs_mask = 0; @@ -655,26 +670,29 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, for (type = 0; type < VB_NUM; type++) { if (mask[type]) { - mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type]; + mgr->num_real_vertex_buffers = + MAX2(mgr->num_real_vertex_buffers, fallback_vbs[type] + 1); + mgr->vertex_buffers_dirty = true; } } memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs)); - return TRUE; + return true; } -static boolean +static bool u_vbuf_translate_begin(struct u_vbuf *mgr, const struct pipe_draw_info *info, const struct pipe_draw_start_count_bias *draw, int start_vertex, unsigned num_vertices, - int min_index, boolean unroll_indices) + int min_index, bool unroll_indices, + uint32_t misaligned) { unsigned mask[VB_NUM] = {0}; struct translate_key key[VB_NUM]; unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */ unsigned i, type; - const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask & + const unsigned incompatible_vb_mask = (misaligned | mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask) & mgr->ve->used_vb_mask; const int start[VB_NUM] = { @@ -697,7 +715,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, for (i = 0; i < mgr->ve->count; i++) { unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index; - if (!mgr->vertex_buffer[vb_index].stride) { + if (!mgr->ve->ve[i].src_stride) { if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && !(incompatible_vb_mask & (1 << vb_index))) { continue; @@ -723,9 +741,10 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, /* Find free vertex buffer slots. */ if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) { - return FALSE; + return false; } + unsigned min_alignment[VB_NUM] = {0}; /* Initialize the translate keys. */ for (i = 0; i < mgr->ve->count; i++) { struct translate_key *k; @@ -764,26 +783,31 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, te->input_offset = mgr->ve->ve[i].src_offset; te->output_format = output_format; te->output_offset = k->output_stride; + unsigned adjustment = 0; + if (!mgr->caps.attrib_component_unaligned && + te->output_offset % mgr->ve->component_size[i] != 0) { + unsigned aligned = align(te->output_offset, mgr->ve->component_size[i]); + adjustment = aligned - te->output_offset; + te->output_offset = aligned; + } - k->output_stride += mgr->ve->native_format_size[i]; + k->output_stride += mgr->ve->native_format_size[i] + adjustment; k->nr_elements++; + min_alignment[type] = MAX2(min_alignment[type], mgr->ve->component_size[i]); } /* Translate buffers. */ for (type = 0; type < VB_NUM; type++) { if (key[type].nr_elements) { enum pipe_error err; + if (!mgr->caps.attrib_component_unaligned) + key[type].output_stride = align(key[type].output_stride, min_alignment[type]); err = u_vbuf_translate_buffers(mgr, &key[type], info, draw, mask[type], mgr->fallback_vbs[type], start[type], num[type], min_index, unroll_indices && type == VB_VERTEX); if (err != PIPE_OK) - return FALSE; - - /* Fixup the stride for constant attribs. */ - if (type == VB_CONST) { - mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0; - } + return false; } } @@ -797,6 +821,12 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, mgr->fallback_velems.velems[i].src_offset = te->output_offset; mgr->fallback_velems.velems[i].vertex_buffer_index = mgr->fallback_vbs[type]; + /* Fixup the stride for constant attribs. */ + if (type == VB_CONST) + mgr->fallback_velems.velems[i].src_stride = 0; + else + mgr->fallback_velems.velems[i].src_stride = key[type].output_stride; + /* elem_index[type][i] can only be set for one type. */ assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u); assert(type > VB_VERTEX || elem_index[type+2][i] == ~0u); @@ -813,8 +843,8 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, mgr->fallback_velems.count = mgr->ve->count; u_vbuf_set_vertex_elements_internal(mgr, &mgr->fallback_velems); - mgr->using_translate = TRUE; - return TRUE; + mgr->using_translate = true; + return true; } static void u_vbuf_translate_end(struct u_vbuf *mgr) @@ -823,7 +853,7 @@ static void u_vbuf_translate_end(struct u_vbuf *mgr) /* Restore vertex elements. */ mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso); - mgr->using_translate = FALSE; + mgr->using_translate = false; /* Unreference the now-unused VBOs. */ for (i = 0; i < VB_NUM; i++) { @@ -833,8 +863,11 @@ static void u_vbuf_translate_end(struct u_vbuf *mgr) mgr->fallback_vbs[i] = ~0; } } - /* This will cause the buffer to be unbound in the driver later. */ - mgr->dirty_real_vb_mask |= mgr->fallback_vbs_mask; + /* This will cause the fallback buffers above num_vertex_buffers to be + * unbound. + */ + mgr->num_real_vertex_buffers = mgr->num_vertex_buffers; + mgr->vertex_buffers_dirty = true; mgr->fallback_vbs_mask = 0; } @@ -880,14 +913,40 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, ve->native_format_size[i] = util_format_get_blocksize(ve->native_format[i]); + const struct util_format_description *desc = util_format_description(format); + bool is_packed = false; + for (unsigned c = 0; c < desc->nr_channels; c++) + is_packed |= desc->channel[c].size != desc->channel[0].size || desc->channel[c].size % 8 != 0; + unsigned component_size = is_packed ? + ve->native_format_size[i] : (ve->native_format_size[i] / desc->nr_channels); + ve->component_size[i] = component_size; + if (ve->ve[i].src_format != format || (!mgr->caps.velem_src_offset_unaligned && - ve->ve[i].src_offset % 4 != 0)) { + ve->ve[i].src_offset % 4 != 0) || + (!mgr->caps.attrib_component_unaligned && + ve->ve[i].src_offset % component_size != 0)) { ve->incompatible_elem_mask |= 1 << i; ve->incompatible_vb_mask_any |= vb_index_bit; } else { ve->compatible_vb_mask_any |= vb_index_bit; + if (component_size == 2) { + ve->vb_align_mask[0] |= vb_index_bit; + if (ve->ve[i].src_stride % 2 != 0) + ve->incompatible_vb_mask |= vb_index_bit; + } + else if (component_size == 4) { + ve->vb_align_mask[1] |= vb_index_bit; + if (ve->ve[i].src_stride % 4 != 0) + ve->incompatible_vb_mask |= vb_index_bit; + } } + ve->strides[ve->ve[i].vertex_buffer_index] = ve->ve[i].src_stride; + if (ve->ve[i].src_stride) { + ve->nonzero_stride_vb_mask |= 1 << ve->ve[i].vertex_buffer_index; + } + if (!mgr->caps.buffer_stride_unaligned && ve->ve[i].src_stride % 4 != 0) + ve->incompatible_vb_mask |= vb_index_bit; } if (used_buffers & ~mgr->allowed_vb_mask) { @@ -936,11 +995,35 @@ static void u_vbuf_delete_vertex_elements(void *ctx, void *state, } void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, - unsigned start_slot, unsigned count, - unsigned unbind_num_trailing_slots, + unsigned count, bool take_ownership, const struct pipe_vertex_buffer *bufs) { + if (!count) { + struct pipe_context *pipe = mgr->pipe; + unsigned last_count = mgr->num_vertex_buffers; + + /* Unbind. */ + mgr->num_vertex_buffers = 0; + mgr->num_real_vertex_buffers = 0; + mgr->user_vb_mask = 0; + mgr->incompatible_vb_mask = 0; + mgr->enabled_vb_mask = 0; + mgr->unaligned_vb_mask[0] = 0; + mgr->unaligned_vb_mask[1] = 0; + mgr->vertex_buffers_dirty = false; + + for (unsigned i = 0; i < last_count; i++) { + pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]); + pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]); + } + + pipe->set_vertex_buffers(pipe, 0, NULL); + return; + } + + assert(bufs); + unsigned i; /* which buffers are enabled */ uint32_t enabled_vb_mask = 0; @@ -948,40 +1031,14 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, uint32_t user_vb_mask = 0; /* which buffers are incompatible with the driver */ uint32_t incompatible_vb_mask = 0; - /* which buffers have a non-zero stride */ - uint32_t nonzero_stride_vb_mask = 0; - const uint32_t mask = - ~(((1ull << (count + unbind_num_trailing_slots)) - 1) << start_slot); - - /* Zero out the bits we are going to rewrite completely. */ - mgr->user_vb_mask &= mask; - mgr->incompatible_vb_mask &= mask; - mgr->nonzero_stride_vb_mask &= mask; - mgr->enabled_vb_mask &= mask; - - if (!bufs) { - struct pipe_context *pipe = mgr->pipe; - /* Unbind. */ - unsigned total_count = count + unbind_num_trailing_slots; - mgr->dirty_real_vb_mask &= mask; - - for (i = 0; i < total_count; i++) { - unsigned dst_index = start_slot + i; - - pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]); - pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); - } - - pipe->set_vertex_buffers(pipe, start_slot, count, - unbind_num_trailing_slots, false, NULL); - return; - } + /* which buffers are unaligned to 2/4 bytes */ + uint32_t unaligned_vb_mask[2] = {0}; + unsigned num_identical = 0; for (i = 0; i < count; i++) { - unsigned dst_index = start_slot + i; const struct pipe_vertex_buffer *vb = &bufs[i]; - struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index]; - struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index]; + struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[i]; + struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[i]; if (!vb->buffer.resource) { pipe_vertex_buffer_unreference(orig_vb); @@ -989,6 +1046,12 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, continue; } + /* The structure has holes: do not use memcmp. */ + if (orig_vb->is_user_buffer == vb->is_user_buffer && + orig_vb->buffer_offset == vb->buffer_offset && + orig_vb->buffer.resource == vb->buffer.resource) + num_identical++; + if (take_ownership) { pipe_vertex_buffer_unreference(orig_vb); memcpy(orig_vb, vb, sizeof(*vb)); @@ -996,25 +1059,26 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, pipe_vertex_buffer_reference(orig_vb, vb); } - if (vb->stride) { - nonzero_stride_vb_mask |= 1 << dst_index; - } - enabled_vb_mask |= 1 << dst_index; + enabled_vb_mask |= 1 << i; - if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) || - (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) { - incompatible_vb_mask |= 1 << dst_index; + if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0)) { + incompatible_vb_mask |= 1 << i; real_vb->buffer_offset = vb->buffer_offset; - real_vb->stride = vb->stride; pipe_vertex_buffer_unreference(real_vb); real_vb->is_user_buffer = false; continue; } + if (!mgr->caps.attrib_component_unaligned) { + if (vb->buffer_offset % 2 != 0) + unaligned_vb_mask[0] |= BITFIELD_BIT(i); + if (vb->buffer_offset % 4 != 0) + unaligned_vb_mask[1] |= BITFIELD_BIT(i); + } + if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) { - user_vb_mask |= 1 << dst_index; + user_vb_mask |= 1 << i; real_vb->buffer_offset = vb->buffer_offset; - real_vb->stride = vb->stride; pipe_vertex_buffer_unreference(real_vb); real_vb->is_user_buffer = false; continue; @@ -1023,21 +1087,24 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, pipe_vertex_buffer_reference(real_vb, vb); } - for (i = 0; i < unbind_num_trailing_slots; i++) { - unsigned dst_index = start_slot + count + i; + unsigned last_count = mgr->num_vertex_buffers; - pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]); - pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); - } + if (num_identical == count && count == last_count) + return; - mgr->user_vb_mask |= user_vb_mask; - mgr->incompatible_vb_mask |= incompatible_vb_mask; - mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; - mgr->enabled_vb_mask |= enabled_vb_mask; + for (; i < last_count; i++) { + pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]); + pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]); + } - /* All changed buffers are marked as dirty, even the NULL ones, - * which will cause the NULL buffers to be unbound in the driver later. */ - mgr->dirty_real_vb_mask |= ~mask; + mgr->num_vertex_buffers = count; + mgr->num_real_vertex_buffers = count; + mgr->user_vb_mask = user_vb_mask; + mgr->incompatible_vb_mask = incompatible_vb_mask; + mgr->enabled_vb_mask = enabled_vb_mask; + mgr->unaligned_vb_mask[0] = unaligned_vb_mask[0]; + mgr->unaligned_vb_mask[1] = unaligned_vb_mask[1]; + mgr->vertex_buffers_dirty = true; } static ALWAYS_INLINE bool @@ -1057,7 +1124,7 @@ get_upload_offset_size(struct u_vbuf *mgr, unsigned instance_div = velem->instance_divisor; *offset = vb->buffer_offset + velem->src_offset; - if (!vb->stride) { + if (!velem->src_stride) { /* Constant attrib. */ *size = ve->src_format_size[velem_index]; } else if (instance_div) { @@ -1072,12 +1139,12 @@ get_upload_offset_size(struct u_vbuf *mgr, if (count * instance_div != num_instances) count++; - *offset += vb->stride * start_instance; - *size = vb->stride * (count - 1) + ve->src_format_size[velem_index]; + *offset += velem->src_stride * start_instance; + *size = velem->src_stride * (count - 1) + ve->src_format_size[velem_index]; } else { /* Per-vertex attrib. */ - *offset += vb->stride * start_vertex; - *size = vb->stride * (num_vertices - 1) + ve->src_format_size[velem_index]; + *offset += velem->src_stride * start_vertex; + *size = velem->src_stride * (num_vertices - 1) + ve->src_format_size[velem_index]; } return true; } @@ -1183,20 +1250,21 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, return PIPE_OK; } -static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr) +static bool u_vbuf_need_minmax_index(const struct u_vbuf *mgr, uint32_t misaligned) { /* See if there are any per-vertex attribs which will be uploaded or * translated. Use bitmasks to get the info instead of looping over vertex * elements. */ return (mgr->ve->used_vb_mask & ((mgr->user_vb_mask | - mgr->incompatible_vb_mask | + mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask | + misaligned | mgr->ve->incompatible_vb_mask_any) & mgr->ve->noninstance_vb_mask_any & - mgr->nonzero_stride_vb_mask)) != 0; + mgr->ve->nonzero_stride_vb_mask)) != 0; } -static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) +static bool u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr, uint32_t misaligned) { /* Return true if there are hw buffers which don't need to be translated. * @@ -1205,9 +1273,11 @@ static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) return (mgr->ve->used_vb_mask & (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask & + ~mgr->ve->incompatible_vb_mask & + ~misaligned & mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any & - mgr->nonzero_stride_vb_mask)) != 0; + mgr->ve->nonzero_stride_vb_mask)) != 0; } static void @@ -1323,31 +1393,27 @@ void u_vbuf_get_minmax_index(struct pipe_context *pipe, static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) { struct pipe_context *pipe = mgr->pipe; - unsigned start_slot, count; + unsigned count = mgr->num_real_vertex_buffers; - start_slot = ffs(mgr->dirty_real_vb_mask) - 1; - count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot); + assert(mgr->vertex_buffers_dirty); - if (mgr->dirty_real_vb_mask == mgr->enabled_vb_mask && - mgr->dirty_real_vb_mask == mgr->user_vb_mask) { + if (mgr->user_vb_mask == BITFIELD_MASK(count)) { /* Fast path that allows us to transfer the VBO references to the driver * to skip atomic reference counting there. These are freshly uploaded * user buffers that can be discarded after this call. */ - pipe->set_vertex_buffers(pipe, start_slot, count, 0, true, - mgr->real_vertex_buffer + start_slot); + pipe->set_vertex_buffers(pipe, count, mgr->real_vertex_buffer); /* We don't own the VBO references now. Set them to NULL. */ for (unsigned i = 0; i < count; i++) { - assert(!mgr->real_vertex_buffer[start_slot + i].is_user_buffer); - mgr->real_vertex_buffer[start_slot + i].buffer.resource = NULL; + assert(!mgr->real_vertex_buffer[i].is_user_buffer); + mgr->real_vertex_buffer[i].buffer.resource = NULL; } } else { /* Slow path where we have to keep VBO references. */ - pipe->set_vertex_buffers(pipe, start_slot, count, 0, false, - mgr->real_vertex_buffer + start_slot); + util_set_vertex_buffers(pipe, count, false, mgr->real_vertex_buffer); } - mgr->dirty_real_vb_mask = 0; + mgr->vertex_buffers_dirty = false; } static void @@ -1374,28 +1440,34 @@ u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info, draw.index_bias = indirect_data[offset + 3]; info->start_instance = indirect_data[offset + 4]; - u_vbuf_draw_vbo(mgr, info, drawid_offset, NULL, draw); + u_vbuf_draw_vbo(mgr->pipe, info, drawid_offset, NULL, &draw, 1); } } -void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, +void u_vbuf_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias draw) + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) { - struct pipe_context *pipe = mgr->pipe; + struct u_vbuf *mgr = pipe->vbuf; int start_vertex; unsigned min_index; unsigned num_vertices; - boolean unroll_indices = FALSE; + bool unroll_indices = false; const uint32_t used_vb_mask = mgr->ve->used_vb_mask; uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask; - const uint32_t incompatible_vb_mask = - mgr->incompatible_vb_mask & used_vb_mask; - struct pipe_draw_info new_info; - struct pipe_draw_start_count_bias new_draw; unsigned fixed_restart_index = info->index_size ? util_prim_restart_index_from_size(info->index_size) : 0; + uint32_t misaligned = 0; + if (!mgr->caps.attrib_component_unaligned) { + for (unsigned i = 0; i < ARRAY_SIZE(mgr->unaligned_vb_mask); i++) { + misaligned |= mgr->ve->vb_align_mask[i] & mgr->unaligned_vb_mask[i]; + } + } + const uint32_t incompatible_vb_mask = + (mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask | misaligned) & used_vb_mask; + /* Normal draw. No fallback and no user buffers. */ if (!incompatible_vb_mask && !mgr->ve->incompatible_elem_mask && @@ -1408,291 +1480,309 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, mgr->caps.supported_prim_modes & BITFIELD_BIT(info->mode)) { /* Set vertex buffers if needed. */ - if (mgr->dirty_real_vb_mask & used_vb_mask) { + if (mgr->vertex_buffers_dirty) { u_vbuf_set_driver_vertex_buffers(mgr); } - pipe->draw_vbo(pipe, info, drawid_offset, indirect, &draw, 1); + pipe->draw_vbo(pipe, info, drawid_offset, indirect, draws, num_draws); return; } - new_info = *info; - new_draw = draw; + /* Increase refcount to be able to use take_index_buffer_ownership with + * all draws. + */ + if (num_draws > 1 && info->take_index_buffer_ownership) + p_atomic_add(&info->index.resource->reference.count, num_draws - 1); - /* Handle indirect (multi)draws. */ - if (indirect && indirect->buffer) { - unsigned draw_count = 0; + for (unsigned d = 0; d < num_draws; d++) { + struct pipe_draw_info new_info = *info; + struct pipe_draw_start_count_bias new_draw = draws[d]; - /* Get the number of draws. */ - if (indirect->indirect_draw_count) { - pipe_buffer_read(pipe, indirect->indirect_draw_count, - indirect->indirect_draw_count_offset, - 4, &draw_count); - } else { - draw_count = indirect->draw_count; - } + /* Handle indirect (multi)draws. */ + if (indirect && indirect->buffer) { + unsigned draw_count = 0; - if (!draw_count) - goto cleanup; + /* num_draws can only be 1 with indirect draws. */ + assert(num_draws == 1); - unsigned data_size = (draw_count - 1) * indirect->stride + - (new_info.index_size ? 20 : 16); - unsigned *data = malloc(data_size); - if (!data) - goto cleanup; /* report an error? */ - - /* Read the used buffer range only once, because the read can be - * uncached. - */ - pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size, - data); - - if (info->index_size) { - /* Indexed multidraw. */ - unsigned index_bias0 = data[3]; - bool index_bias_same = true; - - /* If we invoke the translate path, we have to split the multidraw. */ - if (incompatible_vb_mask || - mgr->ve->incompatible_elem_mask) { - u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data, - indirect->stride, draw_count); - free(data); - return; + /* Get the number of draws. */ + if (indirect->indirect_draw_count) { + pipe_buffer_read(pipe, indirect->indirect_draw_count, + indirect->indirect_draw_count_offset, + 4, &draw_count); + } else { + draw_count = indirect->draw_count; } - /* See if index_bias is the same for all draws. */ - for (unsigned i = 1; i < draw_count; i++) { - if (data[i * indirect->stride / 4 + 3] != index_bias0) { - index_bias_same = false; - break; - } - } + if (!draw_count) + goto cleanup; - /* Split the multidraw if index_bias is different. */ - if (!index_bias_same) { - u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data, - indirect->stride, draw_count); - free(data); - return; - } + unsigned data_size = (draw_count - 1) * indirect->stride + + (new_info.index_size ? 20 : 16); + unsigned *data = malloc(data_size); + if (!data) + goto cleanup; /* report an error? */ - /* If we don't need to use the translate path and index_bias is - * the same, we can process the multidraw with the time complexity - * equal to 1 draw call (except for the index range computation). - * We only need to compute the index range covering all draw calls - * of the multidraw. - * - * The driver will not look at these values because indirect != NULL. - * These values determine the user buffer bounds to upload. + /* Read the used buffer range only once, because the read can be + * uncached. */ - new_draw.index_bias = index_bias0; - new_info.index_bounds_valid = true; - new_info.min_index = ~0u; - new_info.max_index = 0; - new_info.start_instance = ~0u; - unsigned end_instance = 0; - - struct pipe_transfer *transfer = NULL; - const uint8_t *indices; - - if (info->has_user_indices) { - indices = (uint8_t*)info->index.user; - } else { - indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource, - PIPE_MAP_READ, &transfer); - } + pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size, + data); + + if (info->index_size) { + /* Indexed multidraw. */ + unsigned index_bias0 = data[3]; + bool index_bias_same = true; + + /* If we invoke the translate path, we have to split the multidraw. */ + if (incompatible_vb_mask || + mgr->ve->incompatible_elem_mask) { + u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data, + indirect->stride, draw_count); + free(data); + /* We're done (as num_draws is 1), so return early. */ + return; + } - for (unsigned i = 0; i < draw_count; i++) { - unsigned offset = i * indirect->stride / 4; - unsigned start = data[offset + 2]; - unsigned count = data[offset + 0]; - unsigned start_instance = data[offset + 4]; - unsigned instance_count = data[offset + 1]; - - if (!count || !instance_count) - continue; - - /* Update the ranges of instances. */ - new_info.start_instance = MIN2(new_info.start_instance, - start_instance); - end_instance = MAX2(end_instance, start_instance + instance_count); - - /* Update the index range. */ - unsigned min, max; - u_vbuf_get_minmax_index_mapped(&new_info, count, - indices + - new_info.index_size * start, - &min, &max); - - new_info.min_index = MIN2(new_info.min_index, min); - new_info.max_index = MAX2(new_info.max_index, max); - } - free(data); + /* See if index_bias is the same for all draws. */ + for (unsigned i = 1; i < draw_count; i++) { + if (data[i * indirect->stride / 4 + 3] != index_bias0) { + index_bias_same = false; + break; + } + } + + /* Split the multidraw if index_bias is different. */ + if (!index_bias_same) { + u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data, + indirect->stride, draw_count); + free(data); + /* We're done (as num_draws is 1), so return early. */ + return; + } - if (transfer) - pipe_buffer_unmap(pipe, transfer); + /* If we don't need to use the translate path and index_bias is + * the same, we can process the multidraw with the time complexity + * equal to 1 draw call (except for the index range computation). + * We only need to compute the index range covering all draw calls + * of the multidraw. + * + * The driver will not look at these values because indirect != NULL. + * These values determine the user buffer bounds to upload. + */ + new_draw.index_bias = index_bias0; + new_info.index_bounds_valid = true; + new_info.min_index = ~0u; + new_info.max_index = 0; + new_info.start_instance = ~0u; + unsigned end_instance = 0; + + struct pipe_transfer *transfer = NULL; + const uint8_t *indices; + + if (info->has_user_indices) { + indices = (uint8_t*)info->index.user; + } else { + indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource, + PIPE_MAP_READ, &transfer); + } - /* Set the final instance count. */ - new_info.instance_count = end_instance - new_info.start_instance; + for (unsigned i = 0; i < draw_count; i++) { + unsigned offset = i * indirect->stride / 4; + unsigned start = data[offset + 2]; + unsigned count = data[offset + 0]; + unsigned start_instance = data[offset + 4]; + unsigned instance_count = data[offset + 1]; + + if (!count || !instance_count) + continue; + + /* Update the ranges of instances. */ + new_info.start_instance = MIN2(new_info.start_instance, + start_instance); + end_instance = MAX2(end_instance, start_instance + instance_count); + + /* Update the index range. */ + unsigned min, max; + u_vbuf_get_minmax_index_mapped(&new_info, count, + indices + + new_info.index_size * start, + &min, &max); + + new_info.min_index = MIN2(new_info.min_index, min); + new_info.max_index = MAX2(new_info.max_index, max); + } + free(data); - if (new_info.start_instance == ~0u || !new_info.instance_count) - goto cleanup; - } else { - /* Non-indexed multidraw. - * - * Keep the draw call indirect and compute minimums & maximums, - * which will determine the user buffer bounds to upload, but - * the driver will not look at these values because indirect != NULL. - * - * This efficiently processes the multidraw with the time complexity - * equal to 1 draw call. - */ - new_draw.start = ~0u; - new_info.start_instance = ~0u; - unsigned end_vertex = 0; - unsigned end_instance = 0; - - for (unsigned i = 0; i < draw_count; i++) { - unsigned offset = i * indirect->stride / 4; - unsigned start = data[offset + 2]; - unsigned count = data[offset + 0]; - unsigned start_instance = data[offset + 3]; - unsigned instance_count = data[offset + 1]; - - new_draw.start = MIN2(new_draw.start, start); - new_info.start_instance = MIN2(new_info.start_instance, - start_instance); - - end_vertex = MAX2(end_vertex, start + count); - end_instance = MAX2(end_instance, start_instance + instance_count); - } - free(data); + if (transfer) + pipe_buffer_unmap(pipe, transfer); + + /* Set the final instance count. */ + new_info.instance_count = end_instance - new_info.start_instance; - /* Set the final counts. */ - new_draw.count = end_vertex - new_draw.start; - new_info.instance_count = end_instance - new_info.start_instance; + if (new_info.start_instance == ~0u || !new_info.instance_count) + goto cleanup; + } else { + /* Non-indexed multidraw. + * + * Keep the draw call indirect and compute minimums & maximums, + * which will determine the user buffer bounds to upload, but + * the driver will not look at these values because indirect != NULL. + * + * This efficiently processes the multidraw with the time complexity + * equal to 1 draw call. + */ + new_draw.start = ~0u; + new_info.start_instance = ~0u; + unsigned end_vertex = 0; + unsigned end_instance = 0; + + for (unsigned i = 0; i < draw_count; i++) { + unsigned offset = i * indirect->stride / 4; + unsigned start = data[offset + 2]; + unsigned count = data[offset + 0]; + unsigned start_instance = data[offset + 3]; + unsigned instance_count = data[offset + 1]; + + new_draw.start = MIN2(new_draw.start, start); + new_info.start_instance = MIN2(new_info.start_instance, + start_instance); + + end_vertex = MAX2(end_vertex, start + count); + end_instance = MAX2(end_instance, start_instance + instance_count); + } + free(data); - if (new_draw.start == ~0u || !new_draw.count || !new_info.instance_count) + /* Set the final counts. */ + new_draw.count = end_vertex - new_draw.start; + new_info.instance_count = end_instance - new_info.start_instance; + + if (new_draw.start == ~0u || !new_draw.count || !new_info.instance_count) + goto cleanup; + } + } else { + if ((!indirect && !new_draw.count) || !new_info.instance_count) goto cleanup; } - } else { - if ((!indirect && !new_draw.count) || !new_info.instance_count) - goto cleanup; - } - if (new_info.index_size) { - /* See if anything needs to be done for per-vertex attribs. */ - if (u_vbuf_need_minmax_index(mgr)) { - unsigned max_index; + if (new_info.index_size) { + /* See if anything needs to be done for per-vertex attribs. */ + if (u_vbuf_need_minmax_index(mgr, misaligned)) { + unsigned max_index; + + if (new_info.index_bounds_valid) { + min_index = new_info.min_index; + max_index = new_info.max_index; + } else { + u_vbuf_get_minmax_index(mgr->pipe, &new_info, &new_draw, + &min_index, &max_index); + } - if (new_info.index_bounds_valid) { - min_index = new_info.min_index; - max_index = new_info.max_index; + assert(min_index <= max_index); + + start_vertex = min_index + new_draw.index_bias; + num_vertices = max_index + 1 - min_index; + + /* Primitive restart doesn't work when unrolling indices. + * We would have to break this drawing operation into several ones. */ + /* Use some heuristic to see if unrolling indices improves + * performance. */ + if (!indirect && + !new_info.primitive_restart && + util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) && + !u_vbuf_mapping_vertex_buffer_blocks(mgr, misaligned)) { + unroll_indices = true; + user_vb_mask &= ~(mgr->ve->nonzero_stride_vb_mask & + mgr->ve->noninstance_vb_mask_any); + } } else { - u_vbuf_get_minmax_index(mgr->pipe, &new_info, &new_draw, - &min_index, &max_index); - } - - assert(min_index <= max_index); - - start_vertex = min_index + new_draw.index_bias; - num_vertices = max_index + 1 - min_index; - - /* Primitive restart doesn't work when unrolling indices. - * We would have to break this drawing operation into several ones. */ - /* Use some heuristic to see if unrolling indices improves - * performance. */ - if (!indirect && - !new_info.primitive_restart && - util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) && - !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { - unroll_indices = TRUE; - user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & - mgr->ve->noninstance_vb_mask_any); + /* Nothing to do for per-vertex attribs. */ + start_vertex = 0; + num_vertices = 0; + min_index = 0; } } else { - /* Nothing to do for per-vertex attribs. */ - start_vertex = 0; - num_vertices = 0; + start_vertex = new_draw.start; + num_vertices = new_draw.count; min_index = 0; } - } else { - start_vertex = new_draw.start; - num_vertices = new_draw.count; - min_index = 0; - } - /* Translate vertices with non-native layouts or formats. */ - if (unroll_indices || - incompatible_vb_mask || - mgr->ve->incompatible_elem_mask) { - if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw, - start_vertex, num_vertices, - min_index, unroll_indices)) { - debug_warn_once("u_vbuf_translate_begin() failed"); - goto cleanup; - } + /* Translate vertices with non-native layouts or formats. */ + if (unroll_indices || + incompatible_vb_mask || + mgr->ve->incompatible_elem_mask) { + if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw, + start_vertex, num_vertices, + min_index, unroll_indices, misaligned)) { + debug_warn_once("u_vbuf_translate_begin() failed"); + goto cleanup; + } - if (unroll_indices) { - new_info.index_size = 0; - new_draw.index_bias = 0; - new_info.index_bounds_valid = true; - new_info.min_index = 0; - new_info.max_index = new_draw.count - 1; - new_draw.start = 0; + if (unroll_indices) { + if (!new_info.has_user_indices && info->take_index_buffer_ownership) + pipe_drop_resource_references(new_info.index.resource, 1); + new_info.index_size = 0; + new_draw.index_bias = 0; + new_info.index_bounds_valid = true; + new_info.min_index = 0; + new_info.max_index = new_draw.count - 1; + new_draw.start = 0; + } + + user_vb_mask &= ~(incompatible_vb_mask | + mgr->ve->incompatible_vb_mask_all); + mgr->vertex_buffers_dirty = true; } - user_vb_mask &= ~(incompatible_vb_mask | - mgr->ve->incompatible_vb_mask_all); - } + /* Upload user buffers. */ + if (user_vb_mask) { + if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, + new_info.start_instance, + new_info.instance_count) != PIPE_OK) { + debug_warn_once("u_vbuf_upload_buffers() failed"); + goto cleanup; + } - /* Upload user buffers. */ - if (user_vb_mask) { - if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, - new_info.start_instance, - new_info.instance_count) != PIPE_OK) { - debug_warn_once("u_vbuf_upload_buffers() failed"); - goto cleanup; + mgr->vertex_buffers_dirty = true; } - mgr->dirty_real_vb_mask |= user_vb_mask; - } + /* + if (unroll_indices) { + printf("unrolling indices: start_vertex = %i, num_vertices = %i\n", + start_vertex, num_vertices); + util_dump_draw_info(stdout, info); + printf("\n"); + } - /* - if (unroll_indices) { - printf("unrolling indices: start_vertex = %i, num_vertices = %i\n", - start_vertex, num_vertices); - util_dump_draw_info(stdout, info); - printf("\n"); - } + unsigned i; + for (i = 0; i < mgr->nr_vertex_buffers; i++) { + printf("input %i: ", i); + util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i); + printf("\n"); + } + for (i = 0; i < mgr->nr_real_vertex_buffers; i++) { + printf("real %i: ", i); + util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i); + printf("\n"); + } + */ - unsigned i; - for (i = 0; i < mgr->nr_vertex_buffers; i++) { - printf("input %i: ", i); - util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i); - printf("\n"); - } - for (i = 0; i < mgr->nr_real_vertex_buffers; i++) { - printf("real %i: ", i); - util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i); - printf("\n"); + u_upload_unmap(pipe->stream_uploader); + if (mgr->vertex_buffers_dirty) + u_vbuf_set_driver_vertex_buffers(mgr); + + if ((new_info.index_size == 1 && mgr->caps.rewrite_ubyte_ibs) || + (new_info.primitive_restart && + ((new_info.restart_index != fixed_restart_index && mgr->caps.rewrite_restart_index) || + !(mgr->caps.supported_restart_modes & BITFIELD_BIT(new_info.mode)))) || + !(mgr->caps.supported_prim_modes & BITFIELD_BIT(new_info.mode))) { + util_primconvert_save_flatshade_first(mgr->pc, mgr->flatshade_first); + util_primconvert_draw_vbo(mgr->pc, &new_info, drawid_offset, indirect, &new_draw, 1); + } else + pipe->draw_vbo(pipe, &new_info, drawid_offset, indirect, &new_draw, 1); + if (info->increment_draw_id) + drawid_offset++; } - */ - - u_upload_unmap(pipe->stream_uploader); - if (mgr->dirty_real_vb_mask) - u_vbuf_set_driver_vertex_buffers(mgr); - - if ((new_info.index_size == 1 && mgr->caps.rewrite_ubyte_ibs) || - (new_info.primitive_restart && - ((new_info.restart_index != fixed_restart_index && mgr->caps.rewrite_restart_index) || - !(mgr->caps.supported_restart_modes & BITFIELD_BIT(new_info.mode)))) || - !(mgr->caps.supported_prim_modes & BITFIELD_BIT(new_info.mode))) { - util_primconvert_save_flatshade_first(mgr->pc, mgr->flatshade_first); - util_primconvert_draw_vbo(mgr->pc, &new_info, drawid_offset, indirect, &new_draw, 1); - } else - pipe->draw_vbo(pipe, &new_info, drawid_offset, indirect, &new_draw, 1); if (mgr->using_translate) { u_vbuf_translate_end(mgr); |