intel/perf: Use a function to initialize perf counters

And specifically mark it with ATTRIBUTE_NOINLINE. Otherwise it will be
inlined and actually slightly increase code size.

Cuts 505 KiB from iris_dri.so and libvulkan_intel.so.

   text    data     bss     dec     hex filename
1538720       0       0 1538720  177aa0 meson-generated_.._intel_perf_metrics.c.o (before)
 926811   43200       0  970011   ecd1b meson-generated_.._intel_perf_metrics.c.o (after)

   text    data     bss     dec     hex filename
14751700 365708  210004 15327412 e9e0b4 iris_dri.so (before)
14190852 408908  210004 14809764 e1faa4 iris_dri.so (after)

   text    data     bss     dec     hex filename
8744913  214264   22820 8981997  890ded libvulkan_intel.so (before)
8184097  257464   22820 8464381  8127fd libvulkan_intel.so (after)

Relocations increase because the counter initializations are moved from
code (in .text) to pointers (in .text) to .rodata, which require
relocations.

relinfo:
iris_dri.so (before): 15605 relocations, 15385 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users
iris_dri.so (after) : 17765 relocations, 17545 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users

libvulkan_intel.so (before):  8560 relocations, 4829 relative (56%), 355 PLT entries, 1 for local syms (0%), 0 users
libvulkan_intel.so (after) : 10720 relocations, 6989 relative (65%), 355 PLT entries, 1 for local syms (0%), 0 users

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15237>
This commit is contained in:
Matt Turner 2022-03-02 17:53:02 -08:00 committed by Marge Bot
parent 5e6c7a572e
commit bbbbb0325b

View File

@ -461,19 +461,13 @@ def output_counter_report(set, counter, counter_to_idx, current_offset):
key = counter_key(counter)
idx = str(counter_to_idx[key])
c("counter = &query->counters[query->n_counters++];\n")
c("counter->oa_counter_read_" + data_type + " = " + set.read_funcs[counter.get('symbol_name')] + ";\n")
c("counter->name = counters[" + idx + "].name;\n")
c("counter->desc = counters[" + idx + "].desc;\n")
c("counter->symbol_name = counters[" + idx + "].symbol_name;\n")
c("counter->category = counters[" + idx + "].category;\n")
c("counter->type = counters[" + idx + "].type;\n")
c("counter->data_type = counters[" + idx + "].data_type;\n")
c("counter->units = counters[" + idx + "].units;\n")
c("counter->raw_max = " + set.max_values[counter.get('symbol_name')] + ";\n")
current_offset = pot_align(current_offset, sizeof(c_type))
c("counter->offset = " + str(current_offset) + ";\n")
c("counter = &query->counters[query->n_counters++];\n")
c("intel_perf_query_add_counter(counter, " + idx + ", " +
str(current_offset) + ", " +
set.max_values[counter.get('symbol_name')] + ", (oa_counter_read_func)" +
set.read_funcs[counter.get('symbol_name')] + ");\n")
if availability:
c_outdent(3);
@ -757,7 +751,32 @@ def main():
idx += 1
c_outdent(3)
c("};\n")
c("};\n\n")
c(textwrap.dedent("""\
typedef uint64_t (*oa_counter_read_func)(struct intel_perf_config *perf,
const struct intel_perf_query_info *query,
const struct intel_perf_query_result *results);
static void ATTRIBUTE_NOINLINE
intel_perf_query_add_counter(struct intel_perf_query_counter *dest,
int counter_idx, size_t offset,
uint64_t raw_max, oa_counter_read_func oa_counter_read_uint64)
{
const struct intel_perf_query_counter *counter = &counters[counter_idx];
dest->name = counter->name;
dest->desc = counter->desc;
dest->symbol_name = counter->symbol_name;
dest->category = counter->category;
dest->raw_max = raw_max;
dest->offset = offset;
dest->type = counter->type;
dest->data_type = counter->data_type;
dest->units = counter->units;
dest->oa_counter_read_uint64 = oa_counter_read_uint64;
}
"""))
# Print out all metric sets registration functions for each set in each
# generation.