summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/radeonsi/si_state_msaa.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state_msaa.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_state_msaa.c168
1 files changed, 110 insertions, 58 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c b/src/gallium/drivers/radeonsi/si_state_msaa.c
index 8ffe2901970..4d2cc64998e 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -1,25 +1,7 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
*
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * SPDX-License-Identifier: MIT
*/
#include "si_build_pm4.h"
@@ -115,10 +97,23 @@ static const uint32_t sample_locs_16x[] = {
FILL_SREG(-5, -2, 5, 3, -2, 6, 3, -5),
FILL_SREG(-4, -6, 1, 1, -6, 4, 7, -4),
FILL_SREG(-1, -3, 6, 7, -3, 2, 0, -7),
- FILL_SREG(-7, -8, 2, 5, -8, 0, 4, -1),
+ /* We use -7 where DX sample locations want -8, which allows us to make
+ * the PA_SU_PRIM_FILTER_CNTL register immutable. That's a quality compromise
+ * for underused 16x EQAA.
+ */
+ FILL_SREG(-7, -7 /* DX uses -8 */, 2, 5, -7 /* DX uses -8 */, 0, 4, -1),
};
static const uint64_t centroid_priority_16x = 0xc97e64b231d0fa85ull;
+/* distance from the pixel center, indexed by log2(nr_samples) */
+unsigned si_msaa_max_distance[5] = {
+ 0, /* no AA */
+ 4, /* 2x MSAA */
+ 6, /* 4x MSAA */
+ 7, /* 8x MSAA */
+ 7, /* 16x MSAA */
+};
+
static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
unsigned sample_index, float *out_value)
{
@@ -147,55 +142,111 @@ static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_cou
out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;
}
-static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,
+static void si_emit_max_4_sample_locs(struct si_context *sctx, uint64_t centroid_priority,
uint32_t sample_locs)
{
- radeon_begin(cs);
- radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
- radeon_emit(cs, centroid_priority);
- radeon_emit(cs, centroid_priority >> 32);
- radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs);
- radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
- radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
- radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
- radeon_end();
+ if (sctx->screen->info.has_set_context_pairs_packed) {
+ radeon_begin(&sctx->gfx_cs);
+ gfx11_begin_packed_context_regs();
+ gfx11_set_context_reg(R_028BD4_PA_SC_CENTROID_PRIORITY_0, centroid_priority);
+ gfx11_set_context_reg(R_028BD8_PA_SC_CENTROID_PRIORITY_1, centroid_priority >> 32);
+ gfx11_set_context_reg(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs);
+ gfx11_set_context_reg(R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
+ gfx11_set_context_reg(R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
+ gfx11_set_context_reg(R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
+ gfx11_end_packed_context_regs();
+ radeon_end();
+ } else {
+ radeon_begin(&sctx->gfx_cs);
+ radeon_set_context_reg_seq(R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+ radeon_emit(centroid_priority);
+ radeon_emit(centroid_priority >> 32);
+ radeon_set_context_reg(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs);
+ radeon_set_context_reg(R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
+ radeon_set_context_reg(R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
+ radeon_set_context_reg(R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
+ radeon_end();
+ }
}
-static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,
+static void si_emit_max_16_sample_locs(struct si_context *sctx, uint64_t centroid_priority,
const uint32_t *sample_locs, unsigned num_samples)
{
- radeon_begin(cs);
- radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
- radeon_emit(cs, centroid_priority);
- radeon_emit(cs, centroid_priority >> 32);
- radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
+ radeon_begin(&sctx->gfx_cs);
+ radeon_set_context_reg_seq(R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+ radeon_emit(centroid_priority);
+ radeon_emit(centroid_priority >> 32);
+ radeon_set_context_reg_seq(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
num_samples == 8 ? 14 : 16);
- radeon_emit_array(cs, sample_locs, 4);
- radeon_emit_array(cs, sample_locs, 4);
- radeon_emit_array(cs, sample_locs, 4);
- radeon_emit_array(cs, sample_locs, num_samples == 8 ? 2 : 4);
+ radeon_emit_array(sample_locs, 4);
+ radeon_emit_array(sample_locs, 4);
+ radeon_emit_array(sample_locs, 4);
+ radeon_emit_array(sample_locs, num_samples == 8 ? 2 : 4);
radeon_end();
}
-void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)
+static void si_emit_sample_locations(struct si_context *sctx, unsigned index)
{
- switch (nr_samples) {
- default:
- case 1:
- si_emit_max_4_sample_locs(cs, centroid_priority_1x, sample_locs_1x);
- break;
- case 2:
- si_emit_max_4_sample_locs(cs, centroid_priority_2x, sample_locs_2x);
- break;
- case 4:
- si_emit_max_4_sample_locs(cs, centroid_priority_4x, sample_locs_4x);
- break;
- case 8:
- si_emit_max_16_sample_locs(cs, centroid_priority_8x, sample_locs_8x, 8);
- break;
- case 16:
- si_emit_max_16_sample_locs(cs, centroid_priority_16x, sample_locs_16x, 16);
- break;
+ struct radeon_cmdbuf *cs = &sctx->gfx_cs;
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ unsigned nr_samples = sctx->framebuffer.nr_samples;
+
+ /* Smoothing (only possible with nr_samples == 1) uses the same
+ * sample locations as the MSAA it simulates.
+ */
+ if (nr_samples <= 1 && sctx->smoothing_enabled)
+ nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
+
+ /* Always set MSAA sample locations even with 1x MSAA for simplicity.
+ *
+ * The only chips that don't need to set them for 1x MSAA are GFX6-8 except Polaris,
+ * but there is no benefit in not resetting them to 0 when changing framebuffers from MSAA
+ * to non-MSAA.
+ */
+ if (nr_samples != sctx->sample_locs_num_samples) {
+ switch (nr_samples) {
+ default:
+ case 1:
+ si_emit_max_4_sample_locs(sctx, centroid_priority_1x, sample_locs_1x);
+ break;
+ case 2:
+ si_emit_max_4_sample_locs(sctx, centroid_priority_2x, sample_locs_2x);
+ break;
+ case 4:
+ si_emit_max_4_sample_locs(sctx, centroid_priority_4x, sample_locs_4x);
+ break;
+ case 8:
+ si_emit_max_16_sample_locs(sctx, centroid_priority_8x, sample_locs_8x, 8);
+ break;
+ case 16:
+ si_emit_max_16_sample_locs(sctx, centroid_priority_16x, sample_locs_16x, 16);
+ break;
+ }
+ sctx->sample_locs_num_samples = nr_samples;
+ }
+
+ if (sctx->screen->info.has_small_prim_filter_sample_loc_bug) {
+ /* For hardware with the sample location bug, the problem is that in order to use the small
+ * primitive filter, we need to explicitly set the sample locations to 0. But the DB doesn't
+ * properly process the change of sample locations without a flush, and so we can end up
+ * with incorrect Z values.
+ *
+ * Instead of doing a flush, just disable the small primitive filter when MSAA is
+ * force-disabled.
+ *
+ * The alternative of setting sample locations to 0 would require a DB flush to avoid
+ * Z errors, see https://bugs.freedesktop.org/show_bug.cgi?id=96908
+ */
+ bool small_prim_filter_enable = sctx->framebuffer.nr_samples <= 1 || rs->multisample_enable;
+ assert(sctx->family >= CHIP_POLARIS10);
+
+ radeon_begin(cs);
+ radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
+ SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
+ S_028830_SMALL_PRIM_FILTER_ENABLE(small_prim_filter_enable) |
+ /* Small line culling doesn't work on Polaris10-12. */
+ S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12));
+ radeon_end();
}
}
@@ -203,6 +254,7 @@ void si_init_msaa_functions(struct si_context *sctx)
{
int i;
+ sctx->atoms.s.sample_locations.emit = si_emit_sample_locations;
sctx->b.get_sample_position = si_get_sample_position;
si_get_sample_position(&sctx->b, 1, 0, sctx->sample_positions.x1[0]);