summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>2019-07-01 02:19:13 +0200
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>2019-07-04 10:52:26 +0000
commit5ff651c0a7cabcd1b7c348f3a3509aa6c6e406b7 (patch)
treebbecab2b2609edcc84b5020eb5849b6e06aa0d5d
parent726a31df705bba61b91152a84bd0abaea8418768 (diff)
radv: Move more stuff to variant create time.
Due to them depending on the linker result. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
-rw-r--r--src/amd/vulkan/radv_nir_to_llvm.c58
-rw-r--r--src/amd/vulkan/radv_shader.c68
2 files changed, 65 insertions, 61 deletions
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 9c11f605535..934d98bcc8f 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3886,70 +3886,12 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
fprintf(stderr, "compile failed\n");
}
- if (options->dump_shader)
- fprintf(stderr, "disasm:\n%s\n", binary.disasm_string);
-
ac_shader_binary_read_config(&binary, &config, 0, options->supports_spill);
LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
LLVMDisposeModule(llvm_module);
LLVMContextDispose(ctx);
- if (stage == MESA_SHADER_FRAGMENT) {
- shader_info->num_input_vgprs = 0;
- if (G_0286CC_PERSP_SAMPLE_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 2;
- if (G_0286CC_PERSP_CENTER_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 2;
- if (G_0286CC_PERSP_CENTROID_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 2;
- if (G_0286CC_PERSP_PULL_MODEL_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 3;
- if (G_0286CC_LINEAR_SAMPLE_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 2;
- if (G_0286CC_LINEAR_CENTER_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 2;
- if (G_0286CC_LINEAR_CENTROID_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 2;
- if (G_0286CC_LINE_STIPPLE_TEX_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- if (G_0286CC_POS_X_FLOAT_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- if (G_0286CC_POS_Y_FLOAT_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- if (G_0286CC_POS_Z_FLOAT_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- if (G_0286CC_POS_W_FLOAT_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- if (G_0286CC_FRONT_FACE_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- if (G_0286CC_ANCILLARY_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- if (G_0286CC_SAMPLE_COVERAGE_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- if (G_0286CC_POS_FIXED_PT_ENA(config.spi_ps_input_addr))
- shader_info->num_input_vgprs += 1;
- }
- config.num_vgprs = MAX2(config.num_vgprs, shader_info->num_input_vgprs);
-
- /* +3 for scratch wave offset and VCC */
- config.num_sgprs = MAX2(config.num_sgprs,
- shader_info->num_input_sgprs + 3);
-
- /* Enable 64-bit and 16-bit denormals, because there is no performance
- * cost.
- *
- * If denormals are enabled, all floating-point output modifiers are
- * ignored.
- *
- * Don't enable denormals for 32-bit floats, because:
- * - Floating-point output modifiers would be ignored by the hw.
- * - Some opcodes don't support denormals, such as v_mad_f32. We would
- * have to stop using those.
- * - GFX6 & GFX7 would be very slow.
- */
- config.float_mode |= V_00B028_FP_64_DENORMS;
-
size_t disasm_size = binary.disasm_string ? strlen(binary.disasm_string) : 0;
size_t llvm_ir_size = binary.llvm_ir_string ? strlen(binary.llvm_ir_string) : 0;
size_t alloc_size = sizeof(struct radv_shader_binary_legacy) + binary.code_size +
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c30c22fbf7d..3e2966b7856 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -478,8 +478,65 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
{
bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
unsigned vgpr_comp_cnt = 0;
+ unsigned num_input_vgprs = info->num_input_vgprs;
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ num_input_vgprs = 0;
+ if (G_0286CC_PERSP_SAMPLE_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTER_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTROID_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_PERSP_PULL_MODEL_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 3;
+ if (G_0286CC_LINEAR_SAMPLE_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTER_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTROID_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_LINE_STIPPLE_TEX_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_X_FLOAT_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_Y_FLOAT_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_Z_FLOAT_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_W_FLOAT_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_FRONT_FACE_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_ANCILLARY_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_SAMPLE_COVERAGE_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_FIXED_PT_ENA(config_in->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ }
+
+ unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs);
+ /* +3 for scratch wave offset and VCC */
+ unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3);
*config_out = *config_in;
+ config_out->num_vgprs = num_vgprs;
+ config_out->num_sgprs = num_sgprs;
+
+ /* Enable 64-bit and 16-bit denormals, because there is no performance
+ * cost.
+ *
+ * If denormals are enabled, all floating-point output modifiers are
+ * ignored.
+ *
+ * Don't enable denormals for 32-bit floats, because:
+ * - Floating-point output modifiers would be ignored by the hw.
+ * - Some opcodes don't support denormals, such as v_mad_f32. We would
+ * have to stop using those.
+ * - GFX6 & GFX7 would be very slow.
+ */
+ config_out->float_mode |= V_00B028_FP_64_DENORMS;
config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
S_00B12C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5) |
@@ -490,10 +547,10 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) |
S_00B12C_SO_EN(!!info->info.so.num_outputs);
- config_out->rsrc1 = S_00B848_VGPRS((config_in->num_vgprs - 1) / 4) |
- S_00B848_SGPRS((config_in->num_sgprs - 1) / 8) |
+ config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / 4) |
+ S_00B848_SGPRS((num_sgprs - 1) / 8) |
S_00B848_DX10_CLAMP(1) |
- S_00B848_FLOAT_MODE(config_in->float_mode);
+ S_00B848_FLOAT_MODE(config_out->float_mode);
switch (stage) {
case MESA_SHADER_TESS_EVAL:
@@ -807,6 +864,11 @@ shader_variant_compile(struct radv_device *device,
return NULL;
}
+ if (options->dump_shader) {
+ fprintf(stderr, "disasm:\n%s\n", variant->disasm_string);
+ }
+
+
if (device->keep_shader_info) {
if (!gs_copy_shader && !module->nir) {
variant->nir = *shaders;