diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state_msaa.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_msaa.c | 168 |
1 files changed, 110 insertions, 58 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c b/src/gallium/drivers/radeonsi/si_state_msaa.c index 8ffe2901970..4d2cc64998e 100644 --- a/src/gallium/drivers/radeonsi/si_state_msaa.c +++ b/src/gallium/drivers/radeonsi/si_state_msaa.c @@ -1,25 +1,7 @@ /* * Copyright 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * SPDX-License-Identifier: MIT */ #include "si_build_pm4.h" @@ -115,10 +97,23 @@ static const uint32_t sample_locs_16x[] = { FILL_SREG(-5, -2, 5, 3, -2, 6, 3, -5), FILL_SREG(-4, -6, 1, 1, -6, 4, 7, -4), FILL_SREG(-1, -3, 6, 7, -3, 2, 0, -7), - FILL_SREG(-7, -8, 2, 5, -8, 0, 4, -1), + /* We use -7 where DX sample locations want -8, which allows us to make + * the PA_SU_PRIM_FILTER_CNTL register immutable. That's a quality compromise + * for underused 16x EQAA. + */ + FILL_SREG(-7, -7 /* DX uses -8 */, 2, 5, -7 /* DX uses -8 */, 0, 4, -1), }; static const uint64_t centroid_priority_16x = 0xc97e64b231d0fa85ull; +/* distance from the pixel center, indexed by log2(nr_samples) */ +unsigned si_msaa_max_distance[5] = { + 0, /* no AA */ + 4, /* 2x MSAA */ + 6, /* 4x MSAA */ + 7, /* 8x MSAA */ + 7, /* 16x MSAA */ +}; + static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count, unsigned sample_index, float *out_value) { @@ -147,55 +142,111 @@ static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_cou out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f; } -static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority, +static void si_emit_max_4_sample_locs(struct si_context *sctx, uint64_t centroid_priority, uint32_t sample_locs) { - radeon_begin(cs); - radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); - radeon_emit(cs, centroid_priority); - radeon_emit(cs, centroid_priority >> 32); - radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs); - radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs); - radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs); - radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs); - radeon_end(); + if (sctx->screen->info.has_set_context_pairs_packed) { + radeon_begin(&sctx->gfx_cs); + gfx11_begin_packed_context_regs(); + gfx11_set_context_reg(R_028BD4_PA_SC_CENTROID_PRIORITY_0, centroid_priority); + gfx11_set_context_reg(R_028BD8_PA_SC_CENTROID_PRIORITY_1, centroid_priority >> 32); + gfx11_set_context_reg(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs); + gfx11_set_context_reg(R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs); + gfx11_set_context_reg(R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs); + gfx11_set_context_reg(R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs); + gfx11_end_packed_context_regs(); + radeon_end(); + } else { + radeon_begin(&sctx->gfx_cs); + radeon_set_context_reg_seq(R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); + radeon_emit(centroid_priority); + radeon_emit(centroid_priority >> 32); + radeon_set_context_reg(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs); + radeon_set_context_reg(R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs); + radeon_set_context_reg(R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs); + radeon_set_context_reg(R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs); + radeon_end(); + } } -static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority, +static void si_emit_max_16_sample_locs(struct si_context *sctx, uint64_t centroid_priority, const uint32_t *sample_locs, unsigned num_samples) { - radeon_begin(cs); - radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); - radeon_emit(cs, centroid_priority); - radeon_emit(cs, centroid_priority >> 32); - radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, + radeon_begin(&sctx->gfx_cs); + radeon_set_context_reg_seq(R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); + radeon_emit(centroid_priority); + radeon_emit(centroid_priority >> 32); + radeon_set_context_reg_seq(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, num_samples == 8 ? 14 : 16); - radeon_emit_array(cs, sample_locs, 4); - radeon_emit_array(cs, sample_locs, 4); - radeon_emit_array(cs, sample_locs, 4); - radeon_emit_array(cs, sample_locs, num_samples == 8 ? 2 : 4); + radeon_emit_array(sample_locs, 4); + radeon_emit_array(sample_locs, 4); + radeon_emit_array(sample_locs, 4); + radeon_emit_array(sample_locs, num_samples == 8 ? 2 : 4); radeon_end(); } -void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples) +static void si_emit_sample_locations(struct si_context *sctx, unsigned index) { - switch (nr_samples) { - default: - case 1: - si_emit_max_4_sample_locs(cs, centroid_priority_1x, sample_locs_1x); - break; - case 2: - si_emit_max_4_sample_locs(cs, centroid_priority_2x, sample_locs_2x); - break; - case 4: - si_emit_max_4_sample_locs(cs, centroid_priority_4x, sample_locs_4x); - break; - case 8: - si_emit_max_16_sample_locs(cs, centroid_priority_8x, sample_locs_8x, 8); - break; - case 16: - si_emit_max_16_sample_locs(cs, centroid_priority_16x, sample_locs_16x, 16); - break; + struct radeon_cmdbuf *cs = &sctx->gfx_cs; + struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; + unsigned nr_samples = sctx->framebuffer.nr_samples; + + /* Smoothing (only possible with nr_samples == 1) uses the same + * sample locations as the MSAA it simulates. + */ + if (nr_samples <= 1 && sctx->smoothing_enabled) + nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; + + /* Always set MSAA sample locations even with 1x MSAA for simplicity. + * + * The only chips that don't need to set them for 1x MSAA are GFX6-8 except Polaris, + * but there is no benefit in not resetting them to 0 when changing framebuffers from MSAA + * to non-MSAA. + */ + if (nr_samples != sctx->sample_locs_num_samples) { + switch (nr_samples) { + default: + case 1: + si_emit_max_4_sample_locs(sctx, centroid_priority_1x, sample_locs_1x); + break; + case 2: + si_emit_max_4_sample_locs(sctx, centroid_priority_2x, sample_locs_2x); + break; + case 4: + si_emit_max_4_sample_locs(sctx, centroid_priority_4x, sample_locs_4x); + break; + case 8: + si_emit_max_16_sample_locs(sctx, centroid_priority_8x, sample_locs_8x, 8); + break; + case 16: + si_emit_max_16_sample_locs(sctx, centroid_priority_16x, sample_locs_16x, 16); + break; + } + sctx->sample_locs_num_samples = nr_samples; + } + + if (sctx->screen->info.has_small_prim_filter_sample_loc_bug) { + /* For hardware with the sample location bug, the problem is that in order to use the small + * primitive filter, we need to explicitly set the sample locations to 0. But the DB doesn't + * properly process the change of sample locations without a flush, and so we can end up + * with incorrect Z values. + * + * Instead of doing a flush, just disable the small primitive filter when MSAA is + * force-disabled. + * + * The alternative of setting sample locations to 0 would require a DB flush to avoid + * Z errors, see https://bugs.freedesktop.org/show_bug.cgi?id=96908 + */ + bool small_prim_filter_enable = sctx->framebuffer.nr_samples <= 1 || rs->multisample_enable; + assert(sctx->family >= CHIP_POLARIS10); + + radeon_begin(cs); + radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, + SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, + S_028830_SMALL_PRIM_FILTER_ENABLE(small_prim_filter_enable) | + /* Small line culling doesn't work on Polaris10-12. */ + S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12)); + radeon_end(); } } @@ -203,6 +254,7 @@ void si_init_msaa_functions(struct si_context *sctx) { int i; + sctx->atoms.s.sample_locations.emit = si_emit_sample_locations; sctx->b.get_sample_position = si_get_sample_position; si_get_sample_position(&sctx->b, 1, 0, sctx->sample_positions.x1[0]); |