summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/nvc0
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nouveau/nvc0')
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h15
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c33
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c17
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c152
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c12
6 files changed, 196 insertions, 36 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 0729c88dffa..77237a3c0a3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -62,6 +62,8 @@
#define NVC0_NEW_3D_DRIVERCONST (1 << 27)
#define NVC0_NEW_3D_WINDOW_RECTS (1 << 28)
+#define NVC0_NEW_3D_SAMPLE_LOCATIONS (1 << 29)
+
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
#define NVC0_NEW_CP_TEXTURES (1 << 2)
@@ -134,20 +136,21 @@
#define NVC0_CB_AUX_UBO_SIZE ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
/* 8 sets of 32-bits integer pairs sample offsets */
#define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */
-#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
+/* 256 bytes, though only 64 bytes used before GM200 */
+#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 2 * 4 * 4)
/* draw parameters (index bais, base instance, drawid) */
#define NVC0_CB_AUX_DRAW_INFO 0x1a0 /* VP */
/* 32 user buffers, at 4 32-bits integers each */
-#define NVC0_CB_AUX_BUF_INFO(i) 0x220 + (i) * 4 * 4
+#define NVC0_CB_AUX_BUF_INFO(i) 0x2a0 + (i) * 4 * 4
#define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4)
/* 8 surfaces, at 16 32-bits integers each */
-#define NVC0_CB_AUX_SU_INFO(i) 0x420 + (i) * 16 * 4
+#define NVC0_CB_AUX_SU_INFO(i) 0x4a0 + (i) * 16 * 4
#define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4)
/* 1 64-bits address and 1 32-bits sequence */
-#define NVC0_CB_AUX_MP_INFO 0x620
+#define NVC0_CB_AUX_MP_INFO 0x6a0
#define NVC0_CB_AUX_MP_SIZE 3 * 4
/* 512 64-byte blocks for bindless image handles */
-#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4
+#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
#define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4)
/* 4 32-bits floats for the vertex runout, put at the end */
#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
@@ -229,6 +232,8 @@ struct nvc0_context {
struct list_head img_head;
struct pipe_framebuffer_state framebuffer;
+ bool sample_locations_enabled;
+ uint8_t sample_locations[2 * 4 * 8];
struct pipe_blend_color blend_colour;
struct pipe_stencil_ref stencil_ref;
struct pipe_poly_stipple stipple;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 9520d984bb3..57d98753f45 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -481,6 +481,9 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
}
}
}
+ /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */
+ if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET)
+ fp->hdr[5] |= 0x30000000;
for (i = 0; i < info->numOutputs; ++i) {
if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 8e9ede0736a..0efa5840207 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -271,6 +271,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
+ case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
return class_3d >= GM200_3D_CLASS;
case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
return class_3d >= GP100_3D_CLASS;
@@ -319,7 +320,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CONSTBUF0_FLAGS:
case PIPE_CAP_PACKED_UNIFORMS:
case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
- case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -546,6 +546,36 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
}
static void
+nvc0_screen_get_sample_pixel_grid(struct pipe_screen *pscreen,
+ unsigned sample_count,
+ unsigned *width, unsigned *height)
+{
+ switch (sample_count) {
+ case 0:
+ case 1:
+ /* this could be 4x4, but the GL state tracker makes it difficult to
+ * create a 1x MSAA texture and smaller grids save CB space */
+ *width = 2;
+ *height = 4;
+ break;
+ case 2:
+ *width = 2;
+ *height = 4;
+ break;
+ case 4:
+ *width = 2;
+ *height = 2;
+ break;
+ case 8:
+ *width = 1;
+ *height = 2;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
nvc0_screen_destroy(struct pipe_screen *pscreen)
{
struct nvc0_screen *screen = nvc0_screen(pscreen);
@@ -871,6 +901,7 @@ nvc0_screen_create(struct nouveau_device *dev)
pscreen->get_param = nvc0_screen_get_param;
pscreen->get_shader_param = nvc0_screen_get_shader_param;
pscreen->get_paramf = nvc0_screen_get_paramf;
+ pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid;
pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info;
pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 37dbbe66c7c..d9ee62523b9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -854,7 +854,21 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe,
util_copy_framebuffer_state(&nvc0->framebuffer, fb);
- nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS;
+}
+
+static void
+nvc0_set_sample_locations(struct pipe_context *pipe,
+ size_t size, const uint8_t *locations)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->sample_locations_enabled = size && locations;
+ if (size > sizeof(nvc0->sample_locations))
+ size = sizeof(nvc0->sample_locations);
+ memcpy(nvc0->sample_locations, locations, size);
+
+ nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_LOCATIONS;
}
static void
@@ -1407,6 +1421,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_min_samples = nvc0_set_min_samples;
pipe->set_constant_buffer = nvc0_set_constant_buffer;
pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
+ pipe->set_sample_locations = nvc0_set_sample_locations;
pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
pipe->set_scissor_states = nvc0_set_scissor_states;
pipe->set_viewport_states = nvc0_set_viewport_states;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 8e2192d3de2..cc18f41c4bb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -71,13 +71,132 @@ nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
PUSH_DATA (push, 0); // base layer
}
+static uint32_t
+gm200_encode_cb_sample_location(uint8_t x, uint8_t y)
+{
+ static const uint8_t lut[] = {
+ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
+ uint32_t result = 0;
+ /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
+ result |= lut[x] << 8 | lut[y] << 24;
+ /* fill in gaps with data in a representation for SV_SAMPLE_POS */
+ result |= x << 12 | y << 28;
+ return result;
+}
+
+static void
+gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ unsigned grid_width, grid_height, hw_grid_width;
+ uint8_t sample_locations[16][2];
+ unsigned cb[64];
+ unsigned i, pixel, pixel_y, pixel_x, sample;
+ uint32_t packed_locations[4] = {};
+
+ screen->base.base.get_sample_pixel_grid(
+ &screen->base.base, ms, &grid_width, &grid_height);
+
+ hw_grid_width = grid_width;
+ if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
+ hw_grid_width = 4;
+
+ if (nvc0->sample_locations_enabled) {
+ uint8_t locations[2 * 4 * 8];
+ memcpy(locations, nvc0->sample_locations, sizeof(locations));
+ util_sample_locations_flip_y(
+ &screen->base.base, nvc0->framebuffer.height, ms, locations);
+
+ for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
+ for (sample = 0; sample < ms; sample++) {
+ unsigned pixel_x = pixel % hw_grid_width;
+ unsigned pixel_y = pixel / hw_grid_width;
+ unsigned wi = pixel * ms + sample;
+ unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
+ ri = ri * ms + sample;
+ sample_locations[wi][0] = locations[ri] & 0xf;
+ sample_locations[wi][1] = 16 - (locations[ri] >> 4);
+ }
+ }
+ } else {
+ const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
+ for (i = 0; i < 16; i++) {
+ sample_locations[i][0] = ptr[i % ms][0];
+ sample_locations[i][1] = ptr[i % ms][1];
+ }
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64);
+ PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
+ for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+ for (pixel_x = 0; pixel_x < 2; pixel_x++) {
+ for (sample = 0; sample < ms; sample++) {
+ unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
+ unsigned read_index = pixel_y % grid_height * hw_grid_width;
+ uint8_t x, y;
+ read_index += pixel_x % grid_width;
+ read_index = read_index * ms + sample;
+ x = sample_locations[read_index][0];
+ y = sample_locations[read_index][1];
+ cb[write_index] = gm200_encode_cb_sample_location(x, y);
+ }
+ }
+ }
+ PUSH_DATAp(push, cb, 64);
+
+ for (i = 0; i < 16; i++) {
+ packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
+ packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
+ }
+
+ BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
+ PUSH_DATAp(push, packed_locations, 4);
+}
+
+static void
+nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ unsigned i;
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
+ PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
+ for (i = 0; i < ms; i++) {
+ float xy[2];
+ nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
+ PUSH_DATAf(push, xy[0]);
+ PUSH_DATAf(push, xy[1]);
+ }
+}
+
+static void
+validate_sample_locations(struct nvc0_context *nvc0)
+{
+ unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer);
+
+ if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
+ gm200_validate_sample_locations(nvc0, ms);
+ else
+ nvc0_validate_sample_locations(nvc0, ms);
+}
+
static void
nvc0_validate_fb(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
- struct nvc0_screen *screen = nvc0->screen;
- unsigned i, ms;
+ unsigned i;
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
unsigned nr_cbufs = fb->nr_cbufs;
bool serialize = false;
@@ -197,33 +316,6 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
- ms = 1 << ms_mode;
- BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, NVC0_CB_AUX_SIZE);
- PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
- PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
- BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
- PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
- for (i = 0; i < ms; i++) {
- float xy[2];
- nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
- PUSH_DATAf(push, xy[0]);
- PUSH_DATAf(push, xy[1]);
- }
-
- if (screen->base.class_3d >= GM200_3D_CLASS) {
- const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
- uint32_t val[4] = {};
-
- for (i = 0; i < 16; i++) {
- val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
- val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
- }
-
- BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
- PUSH_DATAp(push, val, 4);
- }
-
if (serialize)
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
@@ -879,6 +971,8 @@ validate_list_3d[] = {
NVC0_NEW_3D_TEVLPROG |
NVC0_NEW_3D_GMTYPROG },
{ nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST },
+ { validate_sample_locations, NVC0_NEW_3D_SAMPLE_LOCATIONS |
+ NVC0_NEW_3D_FRAMEBUFFER},
};
bool
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 0f86c11b7f4..39b1369758a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -754,6 +754,16 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers,
}
}
+static void
+gm200_evaluate_depth_buffer(struct pipe_context *pipe)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER);
+ IMMED_NVC0(push, SUBC_3D(0x11fc), 1);
+}
+
/* =============================== BLIT CODE ===================================
*/
@@ -1720,4 +1730,6 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0)
pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
pipe->clear_texture = nv50_clear_texture;
pipe->clear_buffer = nvc0_clear_buffer;
+ if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
+ pipe->evaluate_depth_buffer = gm200_evaluate_depth_buffer;
}