diff options
author | Ilia Mirkin <imirkin@alum.mit.edu> | 2021-02-24 21:04:25 -0500 |
---|---|---|
committer | Ilia Mirkin <imirkin@alum.mit.edu> | 2021-04-11 12:31:14 -0400 |
commit | c3e9be9b5a55f2e1462463d680e48a1506196eac (patch) | |
tree | f94466cae196f127948d1f43158113d9a9e5948a | |
parent | 1a6a772527974b390a5a691512319f7692ae430f (diff) |
nv50: add texture, constbuf, image, buffer validation
This makes compute mostly work. For now we're laying out images/buffers
in a fixed offset from each other in the globals "array", but this
should be done dynamically. We're also missing passing image info to
shaders, as well as adding image formats to a shader key.
Heavily inspired by nvc0 variants of these.
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Acked-by: Pierre Moreau <dev@pmoreau.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9299>
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_compute.c | 280 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_context.c | 1 |
2 files changed, 281 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c index 95fa696a086..017ebe7ef07 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -24,6 +24,7 @@ * */ +#include "util/format/u_format.h" #include "nv50/nv50_context.h" #include "nv50/nv50_compute.xml.h" @@ -152,10 +153,284 @@ nv50_screen_compute_setup(struct nv50_screen *screen, BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1); PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); + BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); + PUSH_DATA (push, (NV50_CB_PCP << 16) | 0x0000); + + BEGIN_NV04(push, NV50_CP(QUERY_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->fence.bo->offset + 16); + PUSH_DATA (push, screen->fence.bo->offset + 16); + return 0; } static void +nv50_compute_validate_samplers(struct nv50_context *nv50) +{ + bool need_flush = nv50_validate_tsc(nv50, NV50_SHADER_STAGE_COMPUTE); + if (need_flush) { + BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + } + + /* Invalidate all 3D samplers because they are aliased. */ + nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; +} + +static void +nv50_compute_validate_textures(struct nv50_context *nv50) +{ + bool need_flush = nv50_validate_tic(nv50, NV50_SHADER_STAGE_COMPUTE); + if (need_flush) { + BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TIC_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + } + + /* Invalidate all 3D textures because they are aliased. */ + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); + nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; +} + +static inline void +nv50_compute_invalidate_constbufs(struct nv50_context *nv50) +{ + int s; + + /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ + for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; s++) { + nv50->constbuf_dirty[s] |= nv50->constbuf_valid[s]; + nv50->state.uniform_buffer_bound[s] = false; + } + nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; +} + +static void +nv50_compute_validate_constbufs(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + const int s = NV50_SHADER_STAGE_COMPUTE; + + while (nv50->constbuf_dirty[s]) { + int i = ffs(nv50->constbuf_dirty[s]) - 1; + nv50->constbuf_dirty[s] &= ~(1 << i); + + if (nv50->constbuf[s][i].user) { + const unsigned b = NV50_CB_PVP + s; + unsigned start = 0; + unsigned words = nv50->constbuf[s][0].size / 4; + if (i) { + NOUVEAU_ERR("user constbufs only supported in slot 0\n"); + continue; + } + if (!nv50->state.uniform_buffer_bound[s]) { + nv50->state.uniform_buffer_bound[s] = true; + BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (b << 12) | (i << 8) | 1); + } + while (words) { + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); + + PUSH_SPACE(push, nr + 3); + BEGIN_NV04(push, NV50_CP(CB_ADDR), 1); + PUSH_DATA (push, (start << 8) | b); + BEGIN_NI04(push, NV50_CP(CB_DATA(0)), nr); + PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr); + + start += nr; + words -= nr; + } + } else { + struct nv04_resource *res = + nv04_resource(nv50->constbuf[s][i].u.buf); + if (res) { + /* TODO: allocate persistent bindings */ + const unsigned b = s * 16 + i; + + assert(nouveau_resource_mapped_by_gpu(&res->base)); + + BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset); + PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset); + PUSH_DATA (push, (b << 16) | + (nv50->constbuf[s][i].size & 0xffff)); + BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (b << 12) | (i << 8) | 1); + + BCTX_REFN(nv50->bufctx_cp, CP_CB(i), res, RD); + + nv50->cb_dirty = 1; /* Force cache flush for UBO. */ + res->cb_bindings[s] |= 1 << i; + } else { + BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (i << 8) | 0); + } + if (i == 0) + nv50->state.uniform_buffer_bound[s] = false; + } + } + + // TODO: Check if having orthogonal slots means the two don't trample over + // each other. + nv50_compute_invalidate_constbufs(nv50); +} + +static void +nv50_compute_validate_buffers(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + int i; + + for (i = 0; i < 7; i++) { + BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5); + if (nv50->buffers[i].buffer) { + struct nv04_resource *res = + nv04_resource(nv50->buffers[i].buffer); + PUSH_DATAh(push, res->address + nv50->buffers[i].buffer_offset); + PUSH_DATA (push, res->address + nv50->buffers[i].buffer_offset); + PUSH_DATA (push, 0); /* pitch? */ + PUSH_DATA (push, ALIGN(nv50->buffers[i].buffer_size, 256) - 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR); + util_range_add(&res->base, &res->valid_buffer_range, + nv50->buffers[i].buffer_offset, + nv50->buffers[i].buffer_offset + + nv50->buffers[i].buffer_size); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + } + } +} + +static void +nv50_get_surface_dims(const struct pipe_image_view *view, + int *width, int *height, int *depth) +{ + struct nv04_resource *res = nv04_resource(view->resource); + int level; + + *width = *height = *depth = 1; + if (res->base.target == PIPE_BUFFER) { + *width = view->u.buf.size / util_format_get_blocksize(view->format); + return; + } + + level = view->u.tex.level; + *width = u_minify(view->resource->width0, level); + *height = u_minify(view->resource->height0, level); + *depth = u_minify(view->resource->depth0, level); + + switch (res->base.target) { + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1; + break; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_3D: + break; + default: + assert(!"unexpected texture target"); + break; + } +} + +static void +nv50_mark_image_range_valid(const struct pipe_image_view *view) +{ + struct nv04_resource *res = (struct nv04_resource *)view->resource; + + assert(view->resource->target == PIPE_BUFFER); + + util_range_add(&res->base, &res->valid_buffer_range, + view->u.buf.offset, + view->u.buf.offset + view->u.buf.size); +} + +static void +nv50_compute_validate_surfaces(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + int i; + + for (i = 0; i < 8; i++) { + struct pipe_image_view *view = &nv50->images[i]; + int width, height, depth; + uint64_t address = 0; + + BEGIN_NV04(push, NV50_CP(GLOBAL(7 + i)), 5); + if (view->resource) { + struct nv04_resource *res = nv04_resource(view->resource); + + /* get surface dimensions based on the target. */ + nv50_get_surface_dims(view, &width, &height, &depth); + + address = res->address; + if (res->base.target == PIPE_BUFFER) { + address += view->u.buf.offset; + assert(!(address & 0xff)); + + if (view->access & PIPE_IMAGE_ACCESS_WRITE) + nv50_mark_image_range_valid(view); + + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + PUSH_DATA (push, 0); /* pitch? */ + PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + } else { + struct nv50_miptree *mt = nv50_miptree(view->resource); + struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level]; + const unsigned z = view->u.tex.first_layer; + + if (mt->layout_3d) { + address += nv50_mt_zslice_offset(mt, view->u.tex.level, z); + if (depth >= 1) { + pipe_debug_message(&nv50->base.debug, CONFORMANCE, + "3D images are not supported!"); + debug_printf("3D images are not supported!\n"); + } + } else { + address += mt->layer_stride * z; + } + address += lvl->offset; + + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + if (nouveau_bo_memtype(res->bo)) { + unsigned h = height << mt->ms_y; + unsigned nby = util_format_get_nblocksy(view->format, h); + unsigned tsy = NV50_TILE_SIZE_Y(lvl->tile_mode) * depth; + + PUSH_DATA (push, lvl->pitch * tsy); + PUSH_DATA (push, (align(nby, tsy) - 1) << 16 | (lvl->pitch - 1)); + PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4); /* mask out z-tiling */ + } else { + PUSH_DATA (push, lvl->pitch); + PUSH_DATA (push, align(lvl->pitch * height, 0x100) - 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + } + } + + BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + } + } +} + +static void nv50_compute_validate_globals(struct nv50_context *nv50) { unsigned i; @@ -173,6 +448,11 @@ nv50_compute_validate_globals(struct nv50_context *nv50) static struct nv50_state_validate validate_list_cp[] = { { nv50_compprog_validate, NV50_NEW_CP_PROGRAM }, + { nv50_compute_validate_constbufs, NV50_NEW_CP_CONSTBUF }, + { nv50_compute_validate_buffers, NV50_NEW_CP_BUFFERS }, + { nv50_compute_validate_surfaces, NV50_NEW_CP_SURFACES }, + { nv50_compute_validate_textures, NV50_NEW_CP_TEXTURES }, + { nv50_compute_validate_samplers, NV50_NEW_CP_SAMPLERS }, { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS }, }; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 3616895ed4c..7643371abae 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -391,6 +391,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo); if (screen->compute) { BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->uniforms); BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc); BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo); } |