nv50: add texture, constbuf, image, buffer validation

This makes compute mostly work. For now we're laying out images/buffers in a fixed offset from each other in the globals "array", but this should be done dynamically. We're also missing passing image info to shaders, as well as adding image formats to a shader key. Heavily inspired by nvc0 variants of these. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Acked-by: Pierre Moreau <dev@pmoreau.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9299>
author: Ilia Mirkin <imirkin@alum.mit.edu> 2021-02-24 21:04:25 -0500
committer: Ilia Mirkin <imirkin@alum.mit.edu> 2021-04-11 12:31:14 -0400
commit: c3e9be9b5a55f2e1462463d680e48a1506196eac (patch)
tree: f94466cae196f127948d1f43158113d9a9e5948a
parent: 1a6a772527974b390a5a691512319f7692ae430f (diff)
2 files changed, 281 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index 95fa696a086..017ebe7ef07 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -24,6 +24,7 @@
  *
  */
 
+#include "util/format/u_format.h"
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_compute.xml.h"
 
@@ -152,10 +153,284 @@ nv50_screen_compute_setup(struct nv50_screen *screen,
    BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
    PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
 
+   BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3);
+   PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
+   PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
+   PUSH_DATA (push, (NV50_CB_PCP << 16) | 0x0000);
+
+   BEGIN_NV04(push, NV50_CP(QUERY_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, screen->fence.bo->offset + 16);
+   PUSH_DATA (push, screen->fence.bo->offset + 16);
+
    return 0;
 }
 
 static void
+nv50_compute_validate_samplers(struct nv50_context *nv50)
+{
+   bool need_flush = nv50_validate_tsc(nv50, NV50_SHADER_STAGE_COMPUTE);
+   if (need_flush) {
+      BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1);
+      PUSH_DATA (nv50->base.pushbuf, 0);
+   }
+
+   /* Invalidate all 3D samplers because they are aliased. */
+   nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
+}
+
+static void
+nv50_compute_validate_textures(struct nv50_context *nv50)
+{
+   bool need_flush = nv50_validate_tic(nv50, NV50_SHADER_STAGE_COMPUTE);
+   if (need_flush) {
+      BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TIC_FLUSH), 1);
+      PUSH_DATA (nv50->base.pushbuf, 0);
+   }
+
+   /* Invalidate all 3D textures because they are aliased. */
+   nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
+   nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
+}
+
+static inline void
+nv50_compute_invalidate_constbufs(struct nv50_context *nv50)
+{
+   int s;
+
+   /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
+   for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; s++) {
+      nv50->constbuf_dirty[s] |= nv50->constbuf_valid[s];
+      nv50->state.uniform_buffer_bound[s] = false;
+   }
+   nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
+}
+
+static void
+nv50_compute_validate_constbufs(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   const int s = NV50_SHADER_STAGE_COMPUTE;
+
+   while (nv50->constbuf_dirty[s]) {
+      int i = ffs(nv50->constbuf_dirty[s]) - 1;
+      nv50->constbuf_dirty[s] &= ~(1 << i);
+
+      if (nv50->constbuf[s][i].user) {
+         const unsigned b = NV50_CB_PVP + s;
+         unsigned start = 0;
+         unsigned words = nv50->constbuf[s][0].size / 4;
+         if (i) {
+            NOUVEAU_ERR("user constbufs only supported in slot 0\n");
+            continue;
+         }
+         if (!nv50->state.uniform_buffer_bound[s]) {
+            nv50->state.uniform_buffer_bound[s] = true;
+            BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
+            PUSH_DATA (push, (b << 12) | (i << 8) | 1);
+         }
+         while (words) {
+            unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
+
+            PUSH_SPACE(push, nr + 3);
+            BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
+            PUSH_DATA (push, (start << 8) | b);
+            BEGIN_NI04(push, NV50_CP(CB_DATA(0)), nr);
+            PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
+
+            start += nr;
+            words -= nr;
+         }
+      } else {
+         struct nv04_resource *res =
+            nv04_resource(nv50->constbuf[s][i].u.buf);
+         if (res) {
+            /* TODO: allocate persistent bindings */
+            const unsigned b = s * 16 + i;
+
+            assert(nouveau_resource_mapped_by_gpu(&res->base));
+
+            BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3);
+            PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
+            PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
+            PUSH_DATA (push, (b << 16) |
+                       (nv50->constbuf[s][i].size & 0xffff));
+            BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
+            PUSH_DATA (push, (b << 12) | (i << 8) | 1);
+
+            BCTX_REFN(nv50->bufctx_cp, CP_CB(i), res, RD);
+
+            nv50->cb_dirty = 1; /* Force cache flush for UBO. */
+            res->cb_bindings[s] |= 1 << i;
+         } else {
+            BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
+            PUSH_DATA (push, (i << 8) | 0);
+         }
+         if (i == 0)
+            nv50->state.uniform_buffer_bound[s] = false;
+      }
+   }
+
+   // TODO: Check if having orthogonal slots means the two don't trample over
+   // each other.
+   nv50_compute_invalidate_constbufs(nv50);
+}
+
+static void
+nv50_compute_validate_buffers(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   int i;
+
+   for (i = 0; i < 7; i++) {
+      BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5);
+      if (nv50->buffers[i].buffer) {
+         struct nv04_resource *res =
+            nv04_resource(nv50->buffers[i].buffer);
+         PUSH_DATAh(push, res->address + nv50->buffers[i].buffer_offset);
+         PUSH_DATA (push, res->address + nv50->buffers[i].buffer_offset);
+         PUSH_DATA (push, 0); /* pitch? */
+         PUSH_DATA (push, ALIGN(nv50->buffers[i].buffer_size, 256) - 1);
+         PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+         BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
+         util_range_add(&res->base, &res->valid_buffer_range,
+                        nv50->buffers[i].buffer_offset,
+                        nv50->buffers[i].buffer_offset +
+                        nv50->buffers[i].buffer_size);
+      } else {
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+      }
+   }
+}
+
+static void
+nv50_get_surface_dims(const struct pipe_image_view *view,
+                      int *width, int *height, int *depth)
+{
+   struct nv04_resource *res = nv04_resource(view->resource);
+   int level;
+
+   *width = *height = *depth = 1;
+   if (res->base.target == PIPE_BUFFER) {
+      *width = view->u.buf.size / util_format_get_blocksize(view->format);
+      return;
+   }
+
+   level = view->u.tex.level;
+   *width = u_minify(view->resource->width0, level);
+   *height = u_minify(view->resource->height0, level);
+   *depth = u_minify(view->resource->depth0, level);
+
+   switch (res->base.target) {
+   case PIPE_TEXTURE_1D_ARRAY:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
+      break;
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_3D:
+      break;
+   default:
+      assert(!"unexpected texture target");
+      break;
+   }
+}
+
+static void
+nv50_mark_image_range_valid(const struct pipe_image_view *view)
+{
+   struct nv04_resource *res = (struct nv04_resource *)view->resource;
+
+   assert(view->resource->target == PIPE_BUFFER);
+
+   util_range_add(&res->base, &res->valid_buffer_range,
+                  view->u.buf.offset,
+                  view->u.buf.offset + view->u.buf.size);
+}
+
+static void
+nv50_compute_validate_surfaces(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   int i;
+
+   for (i = 0; i < 8; i++) {
+      struct pipe_image_view *view = &nv50->images[i];
+      int width, height, depth;
+      uint64_t address = 0;
+
+      BEGIN_NV04(push, NV50_CP(GLOBAL(7 + i)), 5);
+      if (view->resource) {
+         struct nv04_resource *res = nv04_resource(view->resource);
+
+         /* get surface dimensions based on the target. */
+         nv50_get_surface_dims(view, &width, &height, &depth);
+
+         address = res->address;
+         if (res->base.target == PIPE_BUFFER) {
+            address += view->u.buf.offset;
+            assert(!(address & 0xff));
+
+            if (view->access & PIPE_IMAGE_ACCESS_WRITE)
+               nv50_mark_image_range_valid(view);
+
+            PUSH_DATAh(push, address);
+            PUSH_DATA (push, address);
+            PUSH_DATA (push, 0); /* pitch? */
+            PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1);
+            PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+         } else {
+            struct nv50_miptree *mt = nv50_miptree(view->resource);
+            struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
+            const unsigned z = view->u.tex.first_layer;
+
+            if (mt->layout_3d) {
+               address += nv50_mt_zslice_offset(mt, view->u.tex.level, z);
+               if (depth >= 1) {
+                  pipe_debug_message(&nv50->base.debug, CONFORMANCE,
+                                     "3D images are not supported!");
+                  debug_printf("3D images are not supported!\n");
+               }
+            } else {
+               address += mt->layer_stride * z;
+            }
+            address += lvl->offset;
+
+            PUSH_DATAh(push, address);
+            PUSH_DATA (push, address);
+            if (nouveau_bo_memtype(res->bo)) {
+               unsigned h = height << mt->ms_y;
+               unsigned nby = util_format_get_nblocksy(view->format, h);
+               unsigned tsy = NV50_TILE_SIZE_Y(lvl->tile_mode) * depth;
+
+               PUSH_DATA (push, lvl->pitch * tsy);
+               PUSH_DATA (push, (align(nby, tsy) - 1) << 16 | (lvl->pitch - 1));
+               PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4); /* mask out z-tiling */
+            } else {
+               PUSH_DATA (push, lvl->pitch);
+               PUSH_DATA (push, align(lvl->pitch * height, 0x100) - 1);
+               PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+            }
+         }
+
+         BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR);
+      } else {
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+         PUSH_DATA (push, 0);
+      }
+   }
+}
+
+static void
 nv50_compute_validate_globals(struct nv50_context *nv50)
 {
    unsigned i;
@@ -173,6 +448,11 @@ nv50_compute_validate_globals(struct nv50_context *nv50)
 static struct nv50_state_validate
 validate_list_cp[] = {
    { nv50_compprog_validate,              NV50_NEW_CP_PROGRAM     },
+   { nv50_compute_validate_constbufs,     NV50_NEW_CP_CONSTBUF    },
+   { nv50_compute_validate_buffers,       NV50_NEW_CP_BUFFERS     },
+   { nv50_compute_validate_surfaces,      NV50_NEW_CP_SURFACES    },
+   { nv50_compute_validate_textures,      NV50_NEW_CP_TEXTURES    },
+   { nv50_compute_validate_samplers,      NV50_NEW_CP_SAMPLERS    },
    { nv50_compute_validate_globals,       NV50_NEW_CP_GLOBALS     },
 };
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 3616895ed4c..7643371abae 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -391,6 +391,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo);
    if (screen->compute) {
       BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
+      BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->uniforms);
       BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
       BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo);
    }
author	Ilia Mirkin <imirkin@alum.mit.edu>	2021-02-24 21:04:25 -0500
committer	Ilia Mirkin <imirkin@alum.mit.edu>	2021-04-11 12:31:14 -0400
commit	c3e9be9b5a55f2e1462463d680e48a1506196eac (patch)
tree	f94466cae196f127948d1f43158113d9a9e5948a
parent	1a6a772527974b390a5a691512319f7692ae430f (diff)