diff options
author | Matt Turner <mattst88@gmail.com> | 2022-01-31 13:16:26 -0800 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2022-05-09 12:43:36 -0400 |
commit | 063863ddd63e5a8f3a5bdeb55e51e1a391b3630d (patch) | |
tree | 755471a283a37c1553cc8139eb025624791bea48 | |
parent | 39818465c16eeab5c88762edfd2fa9ad315cf91c (diff) |
intel/perf: Store indices to strings rather than pointers
The compiler does a good job of deduplicating strings already, but we
can eliminate the pointers to each string by combining the strings into
a single char array and storing only an index into that array.
The longest of the char arrays is the descriptions array, which is a
little over 45 KiB, so still under MSVC's 64 KiB string literal limit
[0]. Because the string length is under 64 KiB we can use uint16_t as
the index type, which roughly doubles our savings as compared to an int.
This cuts 77 KiB from iris_dri.so (0.5%) and libvulkan_intel.so (0.9%).
text data bss dec hex filename
926811 25920 0 952731 e899b meson-generated_.._intel_perf_metrics.c.o (before)
924401 0 0 924401 e1af1 meson-generated_.._intel_perf_metrics.c.o (after)
text data bss dec hex filename
14190852 391628 210004 14792484 e1b724 iris_dri.so (before)
14137732 365708 210004 14713444 e08264 iris_dri.so (after)
text data bss dec hex filename
8184097 240184 22820 8447101 80e47d libvulkan_intel.so (before)
8131009 214264 22820 8368093 7fafdd libvulkan_intel.so (after)
relinfo:
iris_dri.so (before): 17765 relocations, 17545 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users
iris_dri.so (after) : 15605 relocations, 15385 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users
libvulkan_intel.so (before): 10720 relocations, 6989 relative (65%), 355 PLT entries, 1 for local syms (0%), 0 users
libvulkan_intel.so (after) : 8560 relocations, 4829 relative (56%), 355 PLT entries, 1 for local syms (0%), 0 users
[0] https://docs.microsoft.com/en-us/cpp/cpp/string-and-character-literals-cpp?view=msvc-170&viewFallbackFrom=vs-2019
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
(cherry picked from commit 6c0246dcf4f2d4e2ccdaa97d52833cf9f11ffa4b)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16405>
-rw-r--r-- | src/intel/perf/gen_perf.py | 74 | ||||
-rw-r--r-- | src/intel/perf/intel_perf_setup.h | 8 |
2 files changed, 68 insertions, 14 deletions
diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py index b7ffbf88aea..0666f629163 100644 --- a/src/intel/perf/gen_perf.py +++ b/src/intel/perf/gen_perf.py @@ -20,6 +20,7 @@ # IN THE SOFTWARE. import argparse +import builtins import collections import os import sys @@ -414,7 +415,9 @@ def counter_key(counter): return counter_key_tuple._make([counter.get(field) for field in counter_key_tuple._fields]) -def output_counter_struct(set, counter, idx): +def output_counter_struct(set, counter, idx, + name_to_idx, desc_to_idx, + symbol_name_to_idx, category_to_idx): data_type = counter.data_type data_type_uc = data_type.upper() @@ -426,10 +429,10 @@ def output_counter_struct(set, counter, idx): c("[" + str(idx) + "] = {\n") c_indent(3) - c(".name = \"" + counter.name + "\",\n") - c(".desc = \"" + counter.description + " " + desc_units(counter.units) + "\",\n") - c(".symbol_name = \"" + counter.symbol_name + "\",\n") - c(".category = \"" + counter.mdapi_group + "\",\n") + c(".name_idx = " + str(name_to_idx[counter.name]) + ",\n") + c(".desc_idx = " + str(desc_to_idx[counter.description + " " + desc_units(counter.units)]) + ",\n") + c(".symbol_name_idx = " + str(symbol_name_to_idx[counter.symbol_name]) + ",\n") + c(".category_idx = " + str(category_to_idx[counter.mdapi_group]) + ",\n") c(".type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ",\n") c(".data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ",\n") c(".units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.units) + ",\n") @@ -476,6 +479,29 @@ def output_counter_report(set, counter, counter_to_idx, current_offset): return current_offset + sizeof(c_type) +def str_to_idx_table(strs): + sorted_strs = sorted(strs) + + str_to_idx = collections.OrderedDict() + str_to_idx[sorted_strs[0]] = 0 + previous = sorted_strs[0] + + for i in range(1, len(sorted_strs)): + str_to_idx[sorted_strs[i]] = str_to_idx[previous] + len(previous) + 1 + previous = sorted_strs[i] + + return str_to_idx + + +def output_str_table(name: str, str_to_idx): + c("\n") + c("static const char " + name + "[] = {\n") + c_indent(3) + c("\n".join(f"/* {idx} */ \"{val}\\0\"" for val, idx in str_to_idx.items())) + c_outdent(3) + c("};\n") + + register_types = { 'FLEX': 'flex_regs', 'NOA': 'mux_regs', @@ -728,6 +754,30 @@ def main(): #include "perf/intel_perf_setup.h" """)) + names = builtins.set() + descs = builtins.set() + symbol_names = builtins.set() + categories = builtins.set() + for gen in gens: + for set in gen.sets: + for counter in set.counters: + names.add(counter.get('name')) + symbol_names.add(counter.get('symbol_name')) + descs.add(counter.get('description') + " " + desc_units(counter.get('units'))) + categories.add(counter.get('mdapi_group')) + + name_to_idx = str_to_idx_table(names) + output_str_table("name", name_to_idx) + + desc_to_idx = str_to_idx_table(descs) + output_str_table("desc", desc_to_idx) + + symbol_name_to_idx = str_to_idx_table(symbol_names) + output_str_table("symbol_name", symbol_name_to_idx) + + category_to_idx = str_to_idx_table(categories) + output_str_table("category", category_to_idx) + # Print out all equation functions. for gen in gens: for set in gen.sets: @@ -747,7 +797,11 @@ def main(): key = counter_key(counter) if key not in counter_to_idx: counter_to_idx[key] = idx - output_counter_struct(set, key, idx) + output_counter_struct(set, key, idx, + name_to_idx, + desc_to_idx, + symbol_name_to_idx, + category_to_idx) idx += 1 c_outdent(3) @@ -764,10 +818,10 @@ def main(): { const struct intel_perf_query_counter_data *counter = &counters[counter_idx]; - dest->name = counter->name; - dest->desc = counter->desc; - dest->symbol_name = counter->symbol_name; - dest->category = counter->category; + dest->name = &name[counter->name_idx]; + dest->desc = &desc[counter->desc_idx]; + dest->symbol_name = &symbol_name[counter->symbol_name_idx]; + dest->category = &category[counter->category_idx]; dest->raw_max = raw_max; dest->offset = offset; diff --git a/src/intel/perf/intel_perf_setup.h b/src/intel/perf/intel_perf_setup.h index 1f31c18c2d5..d481255d3f4 100644 --- a/src/intel/perf/intel_perf_setup.h +++ b/src/intel/perf/intel_perf_setup.h @@ -73,10 +73,10 @@ bdw_query_alloc(struct intel_perf_config *perf, int ncounters) } struct intel_perf_query_counter_data { - const char *name; - const char *desc; - const char *symbol_name; - const char *category; + uint16_t name_idx; + uint16_t desc_idx; + uint16_t symbol_name_idx; + uint16_t category_idx; enum intel_perf_counter_type type; enum intel_perf_counter_data_type data_type; enum intel_perf_counter_units units; |