summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-04-13 13:50:04 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-04-14 17:00:14 +0200
commit51c4034f9b344919839cd0bfaa50624a17b61ae5 (patch)
tree080e23ac9c680f47db98d3f67d43635eba5d40bb /src
parentaaf5be4a29b4537b7e298c3ddf889180f3b4d855 (diff)
radeonsi: fix SGPRS calculation once more
This fixes GS piglit failures after adding SI_PARAM_SHADER_BUFFERS, which bumped NUM_USER_SGPRS and uncovered this bug on SI. If this was fixed in LLVM, these workarounds wouldn't be needed. LLVM would have to look at the calling convention to know how many SGPR inputs are declared, and add VCC and the scratch wave offset (which is enabled even if we spill SGPRs but not VGPRs, oh well). Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c67
2 files changed, 23 insertions, 55 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c58467ddcb0..e180fc89d51 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6761,6 +6761,16 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
return true;
}
+static void si_fix_num_sgprs(struct si_shader *shader)
+{
+ unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
+
+ if (shader->config.scratch_bytes_per_wave)
+ min_sgprs += 1; /* scratch wave offset */
+
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+}
+
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug)
@@ -6850,6 +6860,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
}
}
+ si_fix_num_sgprs(shader);
si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
stderr);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index b7ebb48e6a9..1ce7ecc2cf3 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -306,7 +306,7 @@ static void si_set_tesseval_regs(struct si_shader *shader,
static void si_shader_ls(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
@@ -322,18 +322,12 @@ static void si_shader_ls(struct si_shader *shader)
vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
num_user_sgprs = SI_LS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- if (num_user_sgprs > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2;
- }
- assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B528_SGPRS((num_sgprs - 1) / 8) |
+ S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B528_DX10_CLAMP(1) |
S_00B528_FLOAT_MODE(shader->config.float_mode);
@@ -344,7 +338,7 @@ static void si_shader_ls(struct si_shader *shader)
static void si_shader_hs(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
uint64_t va;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -355,20 +349,12 @@ static void si_shader_hs(struct si_shader *shader)
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with tessellation factor
- * buffer offset. */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B428_SGPRS((num_sgprs - 1) / 8) |
+ S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B428_DX10_CLAMP(1) |
S_00B428_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
@@ -379,7 +365,7 @@ static void si_shader_hs(struct si_shader *shader)
static void si_shader_es(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
@@ -400,21 +386,13 @@ static void si_shader_es(struct si_shader *shader)
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B328_SGPRS((num_sgprs - 1) / 8) |
+ S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B328_DX10_CLAMP(1) |
S_00B328_FLOAT_MODE(shader->config.float_mode));
@@ -458,7 +436,7 @@ static void si_shader_gs(struct si_shader *shader)
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
uint64_t va;
unsigned max_stream = shader->selector->max_gs_stream;
@@ -495,17 +473,10 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
num_user_sgprs = SI_GS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
- if ((num_user_sgprs + 2) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2 + 2;
- }
- assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B228_SGPRS((num_sgprs - 1) / 8) |
+ S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B228_DX10_CLAMP(1) |
S_00B228_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
@@ -523,7 +494,7 @@ static void si_shader_gs(struct si_shader *shader)
static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned nparams, vgpr_comp_cnt;
uint64_t va;
unsigned window_space =
@@ -566,13 +537,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->config.num_sgprs;
- if (num_user_sgprs > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2;
- }
- assert(num_sgprs <= 104);
-
/* VS is required to export at least one param. */
nparams = MAX2(shader->info.nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
@@ -594,7 +558,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B128_SGPRS((num_sgprs - 1) / 8) |
+ S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(1) |
S_00B128_FLOAT_MODE(shader->config.float_mode));
@@ -684,7 +648,7 @@ static void si_shader_ps(struct si_shader *shader)
struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
@@ -772,17 +736,10 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
num_user_sgprs = SI_PS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B028_SGPRS((num_sgprs - 1) / 8) |
+ S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B028_DX10_CLAMP(1) |
S_00B028_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,