diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2018-06-14 19:56:28 -0600 |
---|---|---|
committer | Brian Paul <brianp@vmware.com> | 2018-06-14 20:09:45 -0600 |
commit | 66ca7e400b8cf736943feddafef7f76adabf9120 (patch) | |
tree | a338e1c9fd3ced8b144bedbbebcba8fd52c91135 /src/gallium/drivers/nouveau/nvc0 | |
parent | 9f217facbde04dd005b3f6b53bc97480b856d246 (diff) |
nvc0: add support for programmable sample locations
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Diffstat (limited to 'src/gallium/drivers/nouveau/nvc0')
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 33 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 17 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 152 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 12 |
6 files changed, 196 insertions, 36 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 0729c88dffa..77237a3c0a3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -62,6 +62,8 @@ #define NVC0_NEW_3D_DRIVERCONST (1 << 27) #define NVC0_NEW_3D_WINDOW_RECTS (1 << 28) +#define NVC0_NEW_3D_SAMPLE_LOCATIONS (1 << 29) + #define NVC0_NEW_CP_PROGRAM (1 << 0) #define NVC0_NEW_CP_SURFACES (1 << 1) #define NVC0_NEW_CP_TEXTURES (1 << 2) @@ -134,20 +136,21 @@ #define NVC0_CB_AUX_UBO_SIZE ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4) /* 8 sets of 32-bits integer pairs sample offsets */ #define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */ -#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2) +/* 256 bytes, though only 64 bytes used before GM200 */ +#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 2 * 4 * 4) /* draw parameters (index bais, base instance, drawid) */ #define NVC0_CB_AUX_DRAW_INFO 0x1a0 /* VP */ /* 32 user buffers, at 4 32-bits integers each */ -#define NVC0_CB_AUX_BUF_INFO(i) 0x220 + (i) * 4 * 4 +#define NVC0_CB_AUX_BUF_INFO(i) 0x2a0 + (i) * 4 * 4 #define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4) /* 8 surfaces, at 16 32-bits integers each */ -#define NVC0_CB_AUX_SU_INFO(i) 0x420 + (i) * 16 * 4 +#define NVC0_CB_AUX_SU_INFO(i) 0x4a0 + (i) * 16 * 4 #define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4) /* 1 64-bits address and 1 32-bits sequence */ -#define NVC0_CB_AUX_MP_INFO 0x620 +#define NVC0_CB_AUX_MP_INFO 0x6a0 #define NVC0_CB_AUX_MP_SIZE 3 * 4 /* 512 64-byte blocks for bindless image handles */ -#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4 +#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4 #define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4) /* 4 32-bits floats for the vertex runout, put at the end */ #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6) @@ -229,6 +232,8 @@ struct nvc0_context { struct list_head img_head; struct pipe_framebuffer_state framebuffer; + bool sample_locations_enabled; + uint8_t sample_locations[2 * 4 * 8]; struct pipe_blend_color blend_colour; struct pipe_stencil_ref stencil_ref; struct pipe_poly_stipple stipple; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 9520d984bb3..57d98753f45 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -481,6 +481,9 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) } } } + /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */ + if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET) + fp->hdr[5] |= 0x30000000; for (i = 0; i < info->numOutputs; ++i) { if (info->out[i].sn == TGSI_SEMANTIC_COLOR) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 8e9ede0736a..0efa5840207 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -271,6 +271,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: + case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: return class_3d >= GM200_3D_CLASS; case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: return class_3d >= GP100_3D_CLASS; @@ -319,7 +320,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSTBUF0_FLAGS: case PIPE_CAP_PACKED_UNIFORMS: case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: - case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: return 0; case PIPE_CAP_VENDOR_ID: @@ -546,6 +546,36 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen, } static void +nvc0_screen_get_sample_pixel_grid(struct pipe_screen *pscreen, + unsigned sample_count, + unsigned *width, unsigned *height) +{ + switch (sample_count) { + case 0: + case 1: + /* this could be 4x4, but the GL state tracker makes it difficult to + * create a 1x MSAA texture and smaller grids save CB space */ + *width = 2; + *height = 4; + break; + case 2: + *width = 2; + *height = 4; + break; + case 4: + *width = 2; + *height = 2; + break; + case 8: + *width = 1; + *height = 2; + break; + default: + assert(0); + } +} + +static void nvc0_screen_destroy(struct pipe_screen *pscreen) { struct nvc0_screen *screen = nvc0_screen(pscreen); @@ -871,6 +901,7 @@ nvc0_screen_create(struct nouveau_device *dev) pscreen->get_param = nvc0_screen_get_param; pscreen->get_shader_param = nvc0_screen_get_shader_param; pscreen->get_paramf = nvc0_screen_get_paramf; + pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid; pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info; pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 37dbbe66c7c..d9ee62523b9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -854,7 +854,21 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe, util_copy_framebuffer_state(&nvc0->framebuffer, fb); - nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS; +} + +static void +nvc0_set_sample_locations(struct pipe_context *pipe, + size_t size, const uint8_t *locations) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->sample_locations_enabled = size && locations; + if (size > sizeof(nvc0->sample_locations)) + size = sizeof(nvc0->sample_locations); + memcpy(nvc0->sample_locations, locations, size); + + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_LOCATIONS; } static void @@ -1407,6 +1421,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) pipe->set_min_samples = nvc0_set_min_samples; pipe->set_constant_buffer = nvc0_set_constant_buffer; pipe->set_framebuffer_state = nvc0_set_framebuffer_state; + pipe->set_sample_locations = nvc0_set_sample_locations; pipe->set_polygon_stipple = nvc0_set_polygon_stipple; pipe->set_scissor_states = nvc0_set_scissor_states; pipe->set_viewport_states = nvc0_set_viewport_states; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 8e2192d3de2..cc18f41c4bb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -71,13 +71,132 @@ nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers) PUSH_DATA (push, 0); // base layer } +static uint32_t +gm200_encode_cb_sample_location(uint8_t x, uint8_t y) +{ + static const uint8_t lut[] = { + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7}; + uint32_t result = 0; + /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */ + result |= lut[x] << 8 | lut[y] << 24; + /* fill in gaps with data in a representation for SV_SAMPLE_POS */ + result |= x << 12 | y << 28; + return result; +} + +static void +gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + unsigned grid_width, grid_height, hw_grid_width; + uint8_t sample_locations[16][2]; + unsigned cb[64]; + unsigned i, pixel, pixel_y, pixel_x, sample; + uint32_t packed_locations[4] = {}; + + screen->base.base.get_sample_pixel_grid( + &screen->base.base, ms, &grid_width, &grid_height); + + hw_grid_width = grid_width; + if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */ + hw_grid_width = 4; + + if (nvc0->sample_locations_enabled) { + uint8_t locations[2 * 4 * 8]; + memcpy(locations, nvc0->sample_locations, sizeof(locations)); + util_sample_locations_flip_y( + &screen->base.base, nvc0->framebuffer.height, ms, locations); + + for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) { + for (sample = 0; sample < ms; sample++) { + unsigned pixel_x = pixel % hw_grid_width; + unsigned pixel_y = pixel / hw_grid_width; + unsigned wi = pixel * ms + sample; + unsigned ri = (pixel_y * grid_width + pixel_x % grid_width); + ri = ri * ms + sample; + sample_locations[wi][0] = locations[ri] & 0xf; + sample_locations[wi][1] = 16 - (locations[ri] >> 4); + } + } + } else { + const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); + for (i = 0; i < 16; i++) { + sample_locations[i][0] = ptr[i % ms][0]; + sample_locations[i][1] = ptr[i % ms][1]; + } + } + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64); + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); + for (pixel_y = 0; pixel_y < 4; pixel_y++) { + for (pixel_x = 0; pixel_x < 2; pixel_x++) { + for (sample = 0; sample < ms; sample++) { + unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample; + unsigned read_index = pixel_y % grid_height * hw_grid_width; + uint8_t x, y; + read_index += pixel_x % grid_width; + read_index = read_index * ms + sample; + x = sample_locations[read_index][0]; + y = sample_locations[read_index][1]; + cb[write_index] = gm200_encode_cb_sample_location(x, y); + } + } + } + PUSH_DATAp(push, cb, 64); + + for (i = 0; i < 16; i++) { + packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8); + packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4); + } + + BEGIN_NVC0(push, SUBC_3D(0x11e0), 4); + PUSH_DATAp(push, packed_locations, 4); +} + +static void +nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + unsigned i; + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); + for (i = 0; i < ms; i++) { + float xy[2]; + nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); + PUSH_DATAf(push, xy[0]); + PUSH_DATAf(push, xy[1]); + } +} + +static void +validate_sample_locations(struct nvc0_context *nvc0) +{ + unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer); + + if (nvc0->screen->base.class_3d >= GM200_3D_CLASS) + gm200_validate_sample_locations(nvc0, ms); + else + nvc0_validate_sample_locations(nvc0, ms); +} + static void nvc0_validate_fb(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; - struct nvc0_screen *screen = nvc0->screen; - unsigned i, ms; + unsigned i; unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; unsigned nr_cbufs = fb->nr_cbufs; bool serialize = false; @@ -197,33 +316,6 @@ nvc0_validate_fb(struct nvc0_context *nvc0) PUSH_DATA (push, (076543210 << 4) | nr_cbufs); IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode); - ms = 1 << ms_mode; - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, NVC0_CB_AUX_SIZE); - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); - BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); - PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); - for (i = 0; i < ms; i++) { - float xy[2]; - nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); - PUSH_DATAf(push, xy[0]); - PUSH_DATAf(push, xy[1]); - } - - if (screen->base.class_3d >= GM200_3D_CLASS) { - const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); - uint32_t val[4] = {}; - - for (i = 0; i < 16; i++) { - val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0); - val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4); - } - - BEGIN_NVC0(push, SUBC_3D(0x11e0), 4); - PUSH_DATAp(push, val, 4); - } - if (serialize) IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); @@ -879,6 +971,8 @@ validate_list_3d[] = { NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG }, { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST }, + { validate_sample_locations, NVC0_NEW_3D_SAMPLE_LOCATIONS | + NVC0_NEW_3D_FRAMEBUFFER}, }; bool diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 0f86c11b7f4..39b1369758a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -754,6 +754,16 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers, } } +static void +gm200_evaluate_depth_buffer(struct pipe_context *pipe) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER); + IMMED_NVC0(push, SUBC_3D(0x11fc), 1); +} + /* =============================== BLIT CODE =================================== */ @@ -1720,4 +1730,6 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0) pipe->clear_depth_stencil = nvc0_clear_depth_stencil; pipe->clear_texture = nv50_clear_texture; pipe->clear_buffer = nvc0_clear_buffer; + if (nvc0->screen->base.class_3d >= GM200_3D_CLASS) + pipe->evaluate_depth_buffer = gm200_evaluate_depth_buffer; } |