summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2020-03-26 22:02:13 -0400
committerMarge Bot <eric+marge@anholt.net>2020-06-09 16:17:36 +0000
commita23802bcb9a42a02d34a5a36d6e66d6532813a0d (patch)
treed5e18eb24176d3f05fedef00f06cdd7177351d76
parenta1602516d77e7cfb1bf97f8c1298a3a346313ff8 (diff)
ac,radeonsi: start adding support for gfx10.3
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5383>
-rw-r--r--src/amd/common/ac_gpu_info.c7
-rw-r--r--src/amd/common/ac_surface.c2
-rw-r--r--src/amd/common/amd_family.h1
-rw-r--r--src/amd/registers/gfx10.json41
-rw-r--r--src/gallium/drivers/radeonsi/si_perfcounter.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c17
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c4
8 files changed, 64 insertions, 17 deletions
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index a8a43fdc8ee..517de226bd9 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -582,7 +582,8 @@ bool ac_query_gpu_info(int fd, void *dev_p,
info->family == CHIP_VEGA12 ||
info->family == CHIP_RAVEN ||
info->family == CHIP_RAVEN2 ||
- info->family == CHIP_RENOIR);
+ info->family == CHIP_RENOIR ||
+ info->chip_class >= GFX10_3);
info->has_out_of_order_rast = info->chip_class >= GFX8 &&
info->chip_class <= GFX9 &&
@@ -736,7 +737,9 @@ bool ac_query_gpu_info(int fd, void *dev_p,
if (info->chip_class >= GFX10)
info->num_sdp_interfaces = device_info.num_tcc_blocks;
- if (info->chip_class >= GFX10)
+ if (info->chip_class >= GFX10_3)
+ info->max_wave64_per_simd = 16;
+ else if (info->chip_class == GFX10)
info->max_wave64_per_simd = 20;
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
info->max_wave64_per_simd = 8;
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index cbbd86093e7..d7dd9561f6f 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -2127,6 +2127,7 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info,
break;
case GFX10:
+ case GFX10_3:
surf->dcc_offset =
((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
surf->u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
@@ -2169,6 +2170,7 @@ void ac_surface_get_umd_metadata(const struct radeon_info *info,
desc[5] |= S_008F24_META_DATA_ADDRESS(surf->dcc_offset >> 40);
break;
case GFX10:
+ case GFX10_3:
desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->dcc_offset >> 8);
desc[7] = surf->dcc_offset >> 16;
diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index ffcc1bd9240..8262a3a40b7 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -119,6 +119,7 @@ enum chip_class {
GFX8,
GFX9,
GFX10,
+ GFX10_3,
};
enum ring_type {
diff --git a/src/amd/registers/gfx10.json b/src/amd/registers/gfx10.json
index 08f111c87c7..5c2b251ca43 100644
--- a/src/amd/registers/gfx10.json
+++ b/src/amd/registers/gfx10.json
@@ -16372,6 +16372,12 @@
},
{
"chips": ["gfx10"],
+ "map": {"at": 165712, "to": "mm"},
+ "name": "SX_PS_DOWNCONVERT_CONTROL_GFX103",
+ "type_ref": "SX_PS_DOWNCONVERT_CONTROL"
+ },
+ {
+ "chips": ["gfx10"],
"map": {"at": 165716, "to": "mm"},
"name": "SX_PS_DOWNCONVERT",
"type_ref": "SX_PS_DOWNCONVERT"
@@ -17248,7 +17254,9 @@
{"bits": [14, 17], "name": "LOSSY_ALPHA_PRECISION"},
{"bits": [18, 18], "name": "DISABLE_CONSTANT_ENCODE_REG"},
{"bits": [19, 19], "name": "ENABLE_CONSTANT_ENCODE_REG_WRITE"},
- {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"}
+ {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"},
+ {"bits": [21, 21], "name": "SKIP_LOW_COMP_RATIO_GFX103"},
+ {"bits": [22, 22], "name": "DCC_COMPRESS_DISABLE_GFX103"}
]
},
"CB_COLOR0_INFO": {
@@ -18715,7 +18723,8 @@
{"bits": [21, 21], "name": "PRESERVE_ZRANGE"},
{"bits": [22, 22], "name": "PRESERVE_SRESULTS"},
{"bits": [23, 23], "name": "DISABLE_FAST_PASS"},
- {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"}
+ {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"},
+ {"bits": [27, 28], "name": "CENTROID_COMPUTATION_MODE_GFX103"}
]
},
"DB_RMI_L2_CACHE_CONTROL": {
@@ -19426,7 +19435,8 @@
"PA_CL_NGG_CNTL": {
"fields": [
{"bits": [0, 0], "name": "VERTEX_REUSE_OFF"},
- {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"}
+ {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"},
+ {"bits": [2, 9], "name": "VERTEX_REUSE_DEPTH_GFX103"}
]
},
"PA_CL_OBJPRIM_ID_CNTL": {
@@ -19493,8 +19503,9 @@
{"bits": [23, 23], "name": "VS_OUT_CCDIST1_VEC_ENA"},
{"bits": [24, 24], "name": "VS_OUT_MISC_SIDE_BUS_ENA"},
{"bits": [25, 25], "name": "USE_VTX_GS_CUT_FLAG"},
- {"bits": [26, 26], "name": "USE_VTX_SHD_OBJPRIM_ID"},
- {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"}
+ {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"},
+ {"bits": [29, 29], "name": "BYPASS_VTX_RATE_COMBINER_GFX103"},
+ {"bits": [30, 30], "name": "BYPASS_PRIM_RATE_COMBINER_GFX103"}
]
},
"PA_CL_VTE_CNTL": {
@@ -19540,7 +19551,9 @@
{"bits": [13, 16], "name": "MAX_SAMPLE_DIST"},
{"bits": [20, 22], "name": "MSAA_EXPOSED_SAMPLES"},
{"bits": [24, 25], "name": "DETAIL_TO_EXPOSED_MODE"},
- {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"}
+ {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"},
+ {"bits": [28, 28], "name": "SAMPLE_COVERAGE_ENCODING_GFX103"},
+ {"bits": [29, 29], "name": "COVERED_CENTROID_IS_CENTER_GFX103"}
]
},
"PA_SC_AA_MASK_X0Y0_X1Y0": {
@@ -21581,6 +21594,18 @@
{"bits": [10, 19], "name": "PERFCOUNTER_SELECT3"}
]
},
+ "SX_PS_DOWNCONVERT_CONTROL": {
+ "fields": [
+ {"bits": [0, 0], "name": "MRT0_FMT_MAPPING_DISABLE"},
+ {"bits": [1, 1], "name": "MRT1_FMT_MAPPING_DISABLE"},
+ {"bits": [2, 2], "name": "MRT2_FMT_MAPPING_DISABLE"},
+ {"bits": [3, 3], "name": "MRT3_FMT_MAPPING_DISABLE"},
+ {"bits": [4, 4], "name": "MRT4_FMT_MAPPING_DISABLE"},
+ {"bits": [5, 5], "name": "MRT5_FMT_MAPPING_DISABLE"},
+ {"bits": [6, 6], "name": "MRT6_FMT_MAPPING_DISABLE"},
+ {"bits": [7, 7], "name": "MRT7_FMT_MAPPING_DISABLE"}
+ ]
+ },
"SX_PS_DOWNCONVERT": {
"fields": [
{"bits": [0, 3], "enum_ref": "SX_DOWNCONVERT_FORMAT", "name": "MRT0"},
@@ -21836,7 +21861,9 @@
"VGT_HS_OFFCHIP_PARAM_UMD": {
"fields": [
{"bits": [0, 8], "name": "OFFCHIP_BUFFERING"},
- {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"}
+ {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"},
+ {"bits": [0, 9], "name": "OFFCHIP_BUFFERING_GFX103"},
+ {"bits": [10, 11], "name": "OFFCHIP_GRANULARITY_GFX103"}
]
},
"VGT_INSTANCE_BASE_ID": {
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index d6b3fc85767..8825926064d 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -1438,6 +1438,7 @@ void si_init_perfcounters(struct si_screen *screen)
num_blocks = ARRAY_SIZE(groups_gfx9);
break;
case GFX10:
+ case GFX10_3:
blocks = groups_gfx10;
num_blocks = ARRAY_SIZE(groups_gfx10);
break;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 1d14442b445..7fdbfa24c57 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1088,7 +1088,11 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4;
- if (sscreen->info.chip_class >= GFX7) {
+ if (sscreen->info.chip_class >= GFX10_3) {
+ sscreen->vgt_hs_offchip_param =
+ S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+ S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+ } else if (sscreen->info.chip_class >= GFX7) {
if (sscreen->info.chip_class >= GFX8)
--max_offchip_buffers;
sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
@@ -1125,7 +1129,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
/* Only enable primitive binning on APUs by default. */
if (sscreen->info.chip_class >= GFX10) {
sscreen->dpbb_allowed = true;
- sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
+ /* DFSM is not supported on GFX 10.3 and not beneficial on Navi1x. */
} else if (sscreen->info.chip_class == GFX9) {
sscreen->dpbb_allowed = !sscreen->info.has_dedicated_vram;
sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index ecce673caf2..b59f28e028d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -757,8 +757,9 @@ static void si_emit_clip_regs(struct si_context *sctx)
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask |
- (culldist_mask << 8);
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+ S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
+ clipdist_mask | (culldist_mask << 8);
if (sctx->chip_class >= GFX10) {
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
@@ -1384,8 +1385,9 @@ static void si_emit_db_render_state(struct si_context *sctx)
radeon_opt_set_context_reg(
sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
- S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
- S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
+ S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
+ S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
+ S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0));
db_shader_control = sctx->ps_db_shader_control;
@@ -3535,7 +3537,8 @@ static void si_emit_msaa_config(struct si_context *sctx)
sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
- S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples);
+ S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
+ S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3);
if (sctx->framebuffer.nr_samples > 1) {
db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
@@ -5329,6 +5332,7 @@ static void si_init_config(struct si_context *sctx)
* a single primitive shader subgroup.
*/
si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+ /* Reuse for legacy (non-NGG) only. */
si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
if (!has_clear_state) {
@@ -5370,6 +5374,9 @@ static void si_init_config(struct si_context *sctx)
S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
}
+ if (sctx->chip_class >= GFX10_3) {
+ si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+ }
if (sctx->chip_class >= GFX9) {
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 0fd1714f8f8..520eeada9e9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1217,7 +1217,9 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
* this.
*/
shader->ctx_reg.ngg.pa_cl_ngg_cntl =
- S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX);
+ S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX) |
+ /* Reuse for NGG. */
+ S_028838_VERTEX_REUSE_DEPTH_GFX103(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
/* Oversubscribe PC. This improves performance when there are too many varyings. */