diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2023-08-02 13:36:33 -0700 |
---|---|---|
committer | Ian Romanick <ian.d.romanick@intel.com> | 2023-12-29 20:28:54 -0800 |
commit | c6d44284aa633569a58200d00015b3e6d80a465a (patch) | |
tree | 731a9099f5d15f6ef797ae022800c25f4b38fa84 | |
parent | 8ea032b78ee3257fd9398db8b79cdf9ca5ff4a36 (diff) |
intel/dev: Enable VK_KHR_cooperative_matrix on all Gfx9+ GPUs
Gfx12.5 (DG2) will use DPAS instructions to accelerate the
implementation. Earlier platforms will use equivalent discrete
instructions (basically subgroup operations). Gfx12 (Tigerlake) will use
DP4A for 8-bit integer matrix multiplication. Older platforms, which
lack DP4A, will use a suboptimal instruction sequence. There is plenty
of room for improvement here.
On DG2 (Gfx12.5) gets the following results from the CTS:
Test run totals:
Passed: 1642/13982 (11.7%)
Failed: 0/13982 (0.0%)
Not supported: 12340/13982 (88.3%)
Warnings: 0/13982 (0.0%)
Waived: 0/13982 (0.0%)
On DG2 (Gfx12.5) with forced lowering, Raptor Lake (Gfx12) and Ice Lake
(Gfx11):
Test run totals:
Passed: 1662/13982 (11.9%)
Failed: 0/13982 (0.0%)
Not supported: 12320/13982 (88.1%)
Warnings: 0/13982 (0.0%)
Waived: 0/13982 (0.0%)
The difference in the number of tests run is due to
saturatingAccumulation not being set on DG2 when DPAS is used. There is
a comment in "intel/dev: Advertise integer configs with
saturatingAccumulation too" that explains how this could be added should
the need arise.
v2: Prefix type names with INTEL_CMAT_. Suggested by Lionel.
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>
-rw-r--r-- | src/intel/dev/intel_device_info.c | 30 |
1 files changed, 27 insertions, 3 deletions
diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c index 2da815e46d1..1ed313d71a2 100644 --- a/src/intel/dev/intel_device_info.c +++ b/src/intel/dev/intel_device_info.c @@ -614,7 +614,13 @@ static const struct intel_device_info intel_device_info_chv = { GFX8_FEATURES, \ GFX9_HW_INFO, \ .has_sample_with_hiz = true, \ - .has_illegal_ccs_values = true + .has_illegal_ccs_values = true, \ + .cooperative_matrix_configurations = { \ + { SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \ + { SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ + { SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \ + { SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \ + } static const struct intel_device_info intel_device_info_skl_gt1 = { GFX9_FEATURES, .gt = 1, @@ -840,7 +846,13 @@ static const struct intel_device_info intel_device_info_cfl_gt3 = { .has_illegal_ccs_values = true, \ .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ .num_subslices = _subslices, \ - .max_eus_per_subslice = 8 + .max_eus_per_subslice = 8, \ + .cooperative_matrix_configurations = { \ + { SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \ + { SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ + { SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \ + { SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \ + } #define GFX11_URB_MIN_MAX_ENTRIES \ .min_entries = { \ @@ -967,6 +979,12 @@ static const struct intel_device_info intel_device_info_ehl_2x4 = { .scanout = PAT_ENTRY(1, WC, NONE), \ .writeback_incoherent = PAT_ENTRY(0, WB, 2WAY), \ .writecombining = PAT_ENTRY(1, WC, NONE), \ + }, \ + .cooperative_matrix_configurations = { \ + { SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \ + { SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ + { SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \ + { SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \ } #define dual_subslices(args...) { args, } @@ -1099,7 +1117,13 @@ static const struct intel_device_info intel_device_info_sg1 = { .has_lsc = true, \ .has_local_mem = true, \ .has_aux_map = false, \ - .simulator_id = 29 + .simulator_id = 29, \ + .cooperative_matrix_configurations = { \ + { SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \ + { SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \ + { SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \ + { SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \ + } #define DG2_FEATURES \ /* (Sub)slice info comes from the kernel topology info */ \ |