diff options
author | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2019-08-01 10:43:42 +0200 |
---|---|---|
committer | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2019-08-02 09:37:36 +0200 |
commit | 8a86908e9a79fb7ac94df1fa3194b54059a6045f (patch) | |
tree | e85df798586e6bdc2f944d95125622ec403c16cd | |
parent | 953bbacc23bbf8b107a49187abc88cc2f4b1be43 (diff) |
radv/gfx10: add Wave32 support for vertex, tessellation and geometry shaders
It can be enabled with RADV_PERFTEST=gewave32.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
-rw-r--r-- | src/amd/vulkan/radv_debug.h | 1 | ||||
-rw-r--r-- | src/amd/vulkan/radv_device.c | 5 | ||||
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 13 | ||||
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 10 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 1 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 3 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.h | 1 |
7 files changed, 26 insertions, 8 deletions
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 65dbec6e90d..ef5b331d188 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -66,6 +66,7 @@ enum { RADV_PERFTEST_TC_COMPAT_CMASK = 0x80, RADV_PERFTEST_CS_WAVE_32 = 0x100, RADV_PERFTEST_PS_WAVE_32 = 0x200, + RADV_PERFTEST_GE_WAVE_32 = 0x400, }; bool diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index b66b15edf73..fc961040b6e 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -386,6 +386,7 @@ radv_physical_device_init(struct radv_physical_device *device, /* Determine the number of threads per wave for all stages. */ device->cs_wave_size = 64; device->ps_wave_size = 64; + device->ge_wave_size = 64; if (device->rad_info.chip_class >= GFX10) { if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32) @@ -394,6 +395,9 @@ radv_physical_device_init(struct radv_physical_device *device, /* For pixel shaders, wave64 is recommanded. */ if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32) device->ps_wave_size = 32; + + if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32) + device->ge_wave_size = 32; } radv_physical_device_init_mem_types(device); @@ -509,6 +513,7 @@ static const struct debug_control radv_perftest_options[] = { {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK}, {"cswave32", RADV_PERFTEST_CS_WAVE_32}, {"pswave32", RADV_PERFTEST_PS_WAVE_32}, + {"gewave32", RADV_PERFTEST_GE_WAVE_32}, {NULL, 0} }; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index bba5849b152..91251aa69bd 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -295,7 +295,7 @@ get_tcs_num_patches(struct radv_shader_context *ctx) /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */ if (ctx->options->chip_class == GFX6) { - unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); + unsigned one_wave = ctx->options->ge_wave_size / MAX2(num_tcs_input_cp, num_tcs_output_cp); num_patches = MIN2(num_patches, one_wave); } return num_patches; @@ -3038,7 +3038,8 @@ handle_es_outputs_post(struct radv_shader_context *ctx, LLVMValueRef wave_idx = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 24, 4); vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, LLVMBuildMul(ctx->ac.builder, wave_idx, - LLVMConstInt(ctx->ac.i32, 64, false), ""), ""); + LLVMConstInt(ctx->ac.i32, + ctx->ac.wave_size, false), ""), ""); lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx, LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), ""); } @@ -3140,7 +3141,7 @@ static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx) LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef tmp; tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx), - LLVMConstInt(ctx->ac.i32, 64, false), ""); + LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), ""); return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), ""); } @@ -4190,7 +4191,7 @@ ac_setup_rings(struct radv_shader_context *ctx) */ LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2); uint64_t stream_offset = 0; - unsigned num_records = 64; + unsigned num_records = ctx->ac.wave_size; LLVMValueRef base_ring; base_ring = @@ -4223,7 +4224,7 @@ ac_setup_rings(struct radv_shader_context *ctx) ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, ctx->ac.i32_0, ""); - stream_offset += stride * 64; + stream_offset += stride * ctx->ac.wave_size; ring = LLVMBuildBitCast(ctx->ac.builder, ring, ctx->ac.v4i32, ""); @@ -4325,7 +4326,7 @@ radv_nir_shader_wave_size(struct nir_shader *const *shaders, int shader_count, return options->cs_wave_size; else if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT) return options->ps_wave_size; - return 64; + return options->ge_wave_size; } static diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index dbfe261c982..d722d558a43 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1782,7 +1782,7 @@ calculate_ngg_info(const VkGraphicsPipelineCreateInfo *pCreateInfo, /* Round up towards full wave sizes for better ALU utilization. */ if (!max_vert_out_per_gs_instance) { - const unsigned wavesize = 64; + const unsigned wavesize = pipeline->device->physical_device->ge_wave_size; unsigned orig_max_esverts; unsigned orig_max_gsprims; do { @@ -4125,6 +4125,14 @@ radv_compute_vgt_shader_stages_en(const struct radv_pipeline *pipeline) if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); + if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && + pipeline->device->physical_device->ge_wave_size == 32) { + /* legacy GS only supports Wave64 */ + stages |= S_028B54_HS_W32_EN(1) | + S_028B54_GS_W32_EN(radv_pipeline_has_ngg(pipeline)) | + S_028B54_VS_W32_EN(1); + } + return stages; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index a1347060190..1a0b22d63b8 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -304,6 +304,7 @@ struct radv_physical_device { /* Number of threads per wave. */ uint8_t ps_wave_size; uint8_t cs_wave_size; + uint8_t ge_wave_size; /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 48ed86c99b1..97fa80b348c 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -675,7 +675,7 @@ radv_get_shader_wave_size(const struct radv_physical_device *pdevice, return pdevice->cs_wave_size; else if (stage == MESA_SHADER_FRAGMENT) return pdevice->ps_wave_size; - return 64; + return pdevice->ge_wave_size; } static void radv_postprocess_config(const struct radv_physical_device *pdevice, @@ -1144,6 +1144,7 @@ shader_variant_compile(struct radv_device *device, options->address32_hi = device->physical_device->rad_info.address32_hi; options->cs_wave_size = device->physical_device->cs_wave_size; options->ps_wave_size = device->physical_device->ps_wave_size; + options->ge_wave_size = device->physical_device->ge_wave_size; if (options->supports_spill) tm_options |= AC_TM_SUPPORTS_SPILL; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 0ef49628b5d..0ab7db20181 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -130,6 +130,7 @@ struct radv_nir_compiler_options { uint32_t address32_hi; uint8_t cs_wave_size; uint8_t ps_wave_size; + uint8_t ge_wave_size; }; enum radv_ud_index { |