summaryrefslogtreecommitdiff
path: root/src/compiler/nir/nir.h
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2019-03-19 20:55:30 +0000
committerRhys Perry <pendingchaos02@gmail.com>2019-11-25 13:59:11 +0000
commitce9205c03bd20d26af23ca891e97a9f848a612d1 (patch)
treeab6f968dc4f26b51e17bc134742816f89c4efea1 /src/compiler/nir/nir.h
parentb3a3e4d1d27d9df6b020489cf5aa00affdfbe107 (diff)
nir: add a load/store vectorization pass
This pass combines intersecting, adjacent and identical loads/stores into potentially larger ones and will be used by ACO to greatly reduce the number of memory operations. v2: handle nir_deref_type_ptr_as_array v3: assume explicitly laid out types for derefs v4: create less deref casts v4: fix shared boolean vectorization v4: fix copy+paste error in resources_different v4: fix extract_subvector() to pass nir_load_store_vectorize_test.ssbo_load_intersecting_32_32_64 v4: rebase v5: subtract from deref/offset instead of scheduling offset calculations v5: various non-functional changes/cleanups v5: require less metadata and preserve more v5: rebase v6: cleanup and improve dependency handling v6: emit less deref casts v6: pass undef to components not set in the write_mask for new stores v7: fix 8-bit extract_vector() with 64-bit input v7: cleanup creation of store write data v7: update align correctly for when the bit size of load/store increases v7: rename extract_vector to extract_component and update comment v8: prevent combining of row-major matrix column acceses v9: rework process_block() to be able to vectorize more v9: rework the callback function v9: update alignment on all loads/stores, even if they're not vectorized v9: remove entry::store_value, since it will not be updated if it's was from a vectorized load v9: fix bug in subtract_deref(), causing artifacts in Dishonored 2 v9: handle nir_intrinsic_scoped_memory_barrier v10: use nir_ssa_scalar v10: handle non-32-bit offsets v10: use signed offsets for comparison v10: improve create_entry_key_from_offset() v10: support load_shared/store_shared v10: remove strip_deref_casts() v10: don't ever pass NULL to memcmp v10: remove recursion in gcd() v10: fix outdated comment v11: use the new nir_extract_bits() v12: remove use of nir_src_as_const_value in resources_different v13: make entry key hash function deterministic v13: simplify mask_sign_extend() v14: add comment in hash_entry_key() about hashing pointers Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> (v9)
Diffstat (limited to 'src/compiler/nir/nir.h')
-rw-r--r--src/compiler/nir/nir.h7
1 files changed, 7 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index d9f3034cfe4..8b5e777246a 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4194,6 +4194,13 @@ bool nir_opt_vectorize(nir_shader *shader);
bool nir_opt_conditional_discard(nir_shader *shader);
+typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size,
+ unsigned num_components, unsigned high_offset,
+ nir_intrinsic_instr *low, nir_intrinsic_instr *high);
+
+bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
+ nir_should_vectorize_mem_func callback);
+
void nir_sweep(nir_shader *shader);
void nir_remap_dual_slot_attributes(nir_shader *shader,