#include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" #include "translate/translate.h" #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_resource.h" #include "nvc0/nvc0_3d.xml.h" struct push_context { struct nouveau_pushbuf *push; struct translate *translate; void *dest; const void *idxbuf; uint32_t vertex_size; uint32_t restart_index; uint32_t start_instance; uint32_t instance_id; bool prim_restart; bool need_vertex_id; struct { bool enabled; bool value; uint8_t width; unsigned stride; const uint8_t *data; } edgeflag; }; static void nvc0_push_upload_vertex_ids(struct push_context *, struct nvc0_context *, const struct pipe_draw_info *); static void nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx) { ctx->push = nvc0->base.pushbuf; ctx->translate = nvc0->vertex->translate; ctx->vertex_size = nvc0->vertex->size; ctx->instance_id = 0; ctx->need_vertex_id = nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32); ctx->edgeflag.value = true; ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS; /* silence warnings */ ctx->edgeflag.data = NULL; ctx->edgeflag.stride = 0; ctx->edgeflag.width = 0; } static inline void nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias) { struct translate *translate = nvc0->vertex->translate; unsigned i; for (i = 0; i < nvc0->num_vtxbufs; ++i) { const uint8_t *map; const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; if (likely(vb->is_user_buffer)) map = (const uint8_t *)vb->buffer.user; else map = nouveau_resource_map_offset(&nvc0->base, nv04_resource(vb->buffer.resource), vb->buffer_offset, NOUVEAU_BO_RD); if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i))) map += (intptr_t)index_bias * vb->stride; translate->set_buffer(translate, i, map, vb->stride, ~0); } } static inline void nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0, const struct pipe_draw_info *info, unsigned offset) { if (!info->has_user_indices) { struct nv04_resource *buf = nv04_resource(info->index.resource); ctx->idxbuf = nouveau_resource_map_offset(&nvc0->base, buf, offset, NOUVEAU_BO_RD); } else { ctx->idxbuf = info->index.user; } } static inline void nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, int32_t index_bias) { unsigned attr = nvc0->vertprog->vp.edgeflag; struct pipe_vertex_element *ve = &nvc0->vertex->element[attr].pipe; struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; struct nv04_resource *buf = nv04_resource(vb->buffer.resource); ctx->edgeflag.stride = vb->stride; ctx->edgeflag.width = util_format_get_blocksize(ve->src_format); if (!vb->is_user_buffer) { unsigned offset = vb->buffer_offset + ve->src_offset; ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base, buf, offset, NOUVEAU_BO_RD); } else { ctx->edgeflag.data = (const uint8_t *)vb->buffer.user + ve->src_offset; } if (index_bias) ctx->edgeflag.data += (intptr_t)index_bias * vb->stride; } static inline unsigned prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index) { unsigned i; for (i = 0; i < push && elts[i] != index; ++i); return i; } static inline unsigned prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index) { unsigned i; for (i = 0; i < push && elts[i] != index; ++i); return i; } static inline unsigned prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index) { unsigned i; for (i = 0; i < push && elts[i] != index; ++i); return i; } static inline bool ef_value_8(const struct push_context *ctx, uint32_t index) { uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; return !!*pf; } static inline bool ef_value_32(const struct push_context *ctx, uint32_t index) { uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; return !!*pf; } static inline bool ef_toggle(struct push_context *ctx) { ctx->edgeflag.value = !ctx->edgeflag.value; return ctx->edgeflag.value; } static inline unsigned ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n) { unsigned i; bool ef = ctx->edgeflag.value; if (ctx->edgeflag.width == 1) for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); else for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } static inline unsigned ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n) { unsigned i; bool ef = ctx->edgeflag.value; if (ctx->edgeflag.width == 1) for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); else for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } static inline unsigned ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n) { unsigned i; bool ef = ctx->edgeflag.value; if (ctx->edgeflag.width == 1) for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); else for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } static inline unsigned ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n) { unsigned i; bool ef = ctx->edgeflag.value; if (ctx->edgeflag.width == 1) for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i); else for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i); return i; } static inline void * nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_bo *bo; uint64_t va; const unsigned size = count * nvc0->vertex->size; void *const dest = nouveau_scratch_get(&nvc0->base, size, &va, &bo); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2); PUSH_DATAh(push, va); PUSH_DATA (push, va); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); PUSH_DATAh(push, va + size - 1); PUSH_DATA (push, va + size - 1); BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); return dest; } static void disp_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; const uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start; unsigned pos = 0; do { unsigned nR = count; if (unlikely(ctx->prim_restart)) nR = prim_restart_search_i08(elts, nR, ctx->restart_index); translate->run_elts8(translate, elts, nR, ctx->start_instance, ctx->instance_id, ctx->dest); count -= nR; ctx->dest += nR * ctx->vertex_size; while (nR) { unsigned nE = nR; if (unlikely(ctx->edgeflag.enabled)) nE = ef_toggle_search_i08(ctx, elts, nR); PUSH_SPACE(push, 4); if (likely(nE >= 2)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nE); } else if (nE) { if (pos <= 0xff) { IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); } else { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, pos); } } if (unlikely(nE != nR)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nE; elts += nE; nR -= nE; } if (count) { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, 0xffffffff); ++elts; ctx->dest += ctx->vertex_size; ++pos; --count; } } while (count); } static void disp_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; const uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start; unsigned pos = 0; do { unsigned nR = count; if (unlikely(ctx->prim_restart)) nR = prim_restart_search_i16(elts, nR, ctx->restart_index); translate->run_elts16(translate, elts, nR, ctx->start_instance, ctx->instance_id, ctx->dest); count -= nR; ctx->dest += nR * ctx->vertex_size; while (nR) { unsigned nE = nR; if (unlikely(ctx->edgeflag.enabled)) nE = ef_toggle_search_i16(ctx, elts, nR); PUSH_SPACE(push, 4); if (likely(nE >= 2)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nE); } else if (nE) { if (pos <= 0xff) { IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); } else { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, pos); } } if (unlikely(nE != nR)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nE; elts += nE; nR -= nE; } if (count) { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, 0xffffffff); ++elts; ctx->dest += ctx->vertex_size; ++pos; --count; } } while (count); } static void disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; unsigned pos = 0; do { unsigned nR = count; if (unlikely(ctx->prim_restart)) nR = prim_restart_search_i32(elts, nR, ctx->restart_index); translate->run_elts(translate, elts, nR, ctx->start_instance, ctx->instance_id, ctx->dest); count -= nR; ctx->dest += nR * ctx->vertex_size; while (nR) { unsigned nE = nR; if (unlikely(ctx->edgeflag.enabled)) nE = ef_toggle_search_i32(ctx, elts, nR); PUSH_SPACE(push, 4); if (likely(nE >= 2)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nE); } else if (nE) { if (pos <= 0xff) { IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); } else { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, pos); } } if (unlikely(nE != nR)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nE; elts += nE; nR -= nE; } if (count) { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, 0xffffffff); ++elts; ctx->dest += ctx->vertex_size; ++pos; --count; } } while (count); } static void disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; unsigned pos = 0; /* XXX: This will read the data corresponding to the primitive restart index, * maybe we should avoid that ? */ translate->run(translate, start, count, ctx->start_instance, ctx->instance_id, ctx->dest); do { unsigned nr = count; if (unlikely(ctx->edgeflag.enabled)) nr = ef_toggle_search_seq(ctx, start + pos, nr); PUSH_SPACE(push, 4); if (likely(nr)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nr); } if (unlikely(nr != count)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nr; count -= nr; } while (count); } #define NVC0_PRIM_GL_CASE(n) \ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n static inline unsigned nvc0_prim_gl(unsigned prim) { switch (prim) { NVC0_PRIM_GL_CASE(POINTS); NVC0_PRIM_GL_CASE(LINES); NVC0_PRIM_GL_CASE(LINE_LOOP); NVC0_PRIM_GL_CASE(LINE_STRIP); NVC0_PRIM_GL_CASE(TRIANGLES); NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); NVC0_PRIM_GL_CASE(TRIANGLE_FAN); NVC0_PRIM_GL_CASE(QUADS); NVC0_PRIM_GL_CASE(QUAD_STRIP); NVC0_PRIM_GL_CASE(POLYGON); NVC0_PRIM_GL_CASE(LINES_ADJACENCY); NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); NVC0_PRIM_GL_CASE(PATCHES); default: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; } } void nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; unsigned inst_count = info->instance_count; unsigned vert_count = info->count; unsigned prim; nvc0_push_context_init(nvc0, &ctx); nvc0_vertex_configure_translate(nvc0, info->index_bias); if (nvc0->state.index_bias) { /* this is already taken care of by translate */ IMMED_NVC0(ctx.push, NVC0_3D(VB_ELEMENT_BASE), 0); nvc0->state.index_bias = 0; } if (unlikely(ctx.edgeflag.enabled)) nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias); ctx.prim_restart = info->primitive_restart; ctx.restart_index = info->restart_index; if (info->primitive_restart) { /* NOTE: I hope we won't ever need that last index (~0). * If we do, we have to disable primitive restart here always and * use END,BEGIN to restart. (XXX: would that affect PrimitiveID ?) * We could also deactive PRIM_RESTART_WITH_DRAW_ARRAYS temporarily, * and add manual restart to disp_vertices_seq. */ BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2); PUSH_DATA (ctx.push, 1); PUSH_DATA (ctx.push, info->index_size ? 0xffffffff : info->restart_index); } else if (nvc0->state.prim_restart) { IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0); } nvc0->state.prim_restart = info->primitive_restart; if (info->index_size) { nvc0_push_map_idxbuf(&ctx, nvc0, info, info->start * info->index_size); index_size = info->index_size; } else { if (unlikely(info->count_from_stream_output)) { struct pipe_context *pipe = &nvc0->base.pipe; struct nvc0_so_target *targ; targ = nvc0_so_target(info->count_from_stream_output); pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count); vert_count /= targ->stride; } ctx.idxbuf = NULL; /* shut up warnings */ index_size = 0; } ctx.start_instance = info->start_instance; prim = nvc0_prim_gl(info->mode); do { PUSH_SPACE(ctx.push, 9); ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count); if (unlikely(!ctx.dest)) break; if (unlikely(ctx.need_vertex_id)) nvc0_push_upload_vertex_ids(&ctx, nvc0, info); if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS) IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, prim); switch (index_size) { case 1: disp_vertices_i08(&ctx, info->start, vert_count); break; case 2: disp_vertices_i16(&ctx, info->start, vert_count); break; case 4: disp_vertices_i32(&ctx, info->start, vert_count); break; default: assert(index_size == 0); disp_vertices_seq(&ctx, info->start, vert_count); break; } PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); if (--inst_count) { prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ctx.instance_id; } nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP); nouveau_scratch_done(&nvc0->base); } while (inst_count); /* reset state and unmap buffers (no-op) */ if (unlikely(!ctx.edgeflag.value)) { PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); } if (unlikely(ctx.need_vertex_id)) { PUSH_SPACE(ctx.push, 4); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1); PUSH_DATA (ctx.push, NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0); } if (info->index_size && !info->has_user_indices) nouveau_resource_unmap(nv04_resource(info->index.resource)); for (i = 0; i < nvc0->num_vtxbufs; ++i) nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer.resource)); NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1); } static inline void copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n) { unsigned i; for (i = 0; i < n; ++i) dst[i] = elts[i] + bias; } static inline void copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n) { unsigned i; for (i = 0; i < n; ++i) dst[i] = elts[i] + bias; } static inline void copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n) { unsigned i; for (i = 0; i < n; ++i) dst[i] = elts[i] + bias; } static void nvc0_push_upload_vertex_ids(struct push_context *ctx, struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct nouveau_pushbuf *push = ctx->push; struct nouveau_bo *bo; uint64_t va; uint32_t *data; uint32_t format; unsigned index_size = info->index_size; unsigned i; unsigned a = nvc0->vertex->num_elements; if (!index_size || info->index_bias) index_size = 4; data = (uint32_t *)nouveau_scratch_get(&nvc0->base, info->count * index_size, &va, &bo); BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); if (info->index_size) { if (!info->index_bias) { memcpy(data, ctx->idxbuf, info->count * index_size); } else { switch (info->index_size) { case 1: copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count); break; case 2: copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count); break; default: copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count); break; } } } else { for (i = 0; i < info->count; ++i) data[i] = i + (info->start + info->index_bias); } format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT; switch (index_size) { case 1: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8; break; case 2: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16; break; default: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32; break; } PUSH_SPACE(push, 12); if (unlikely(nvc0->state.instance_elts & 2)) { nvc0->state.instance_elts &= ~2; IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0); } BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); PUSH_DATA (push, format); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3); PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); PUSH_DATAh(push, va); PUSH_DATA (push, va); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); PUSH_DATAh(push, va + info->count * index_size - 1); PUSH_DATA (push, va + info->count * index_size - 1); #define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \ (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT) BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1); PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1); }