diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/min_heap.h | 134 | ||||
-rw-r--r-- | include/linux/perf_event.h | 19 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 8 |
3 files changed, 160 insertions, 1 deletions
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h new file mode 100644 index 000000000000..44077837385f --- /dev/null +++ b/include/linux/min_heap.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MIN_HEAP_H +#define _LINUX_MIN_HEAP_H + +#include <linux/bug.h> +#include <linux/string.h> +#include <linux/types.h> + +/** + * struct min_heap - Data structure to hold a min-heap. + * @data: Start of array holding the heap elements. + * @nr: Number of elements currently in the heap. + * @size: Maximum number of elements that can be held in current storage. + */ +struct min_heap { + void *data; + int nr; + int size; +}; + +/** + * struct min_heap_callbacks - Data/functions to customise the min_heap. + * @elem_size: The nr of each element in bytes. + * @less: Partial order function for this heap. + * @swp: Swap elements function. + */ +struct min_heap_callbacks { + int elem_size; + bool (*less)(const void *lhs, const void *rhs); + void (*swp)(void *lhs, void *rhs); +}; + +/* Sift the element at pos down the heap. */ +static __always_inline +void min_heapify(struct min_heap *heap, int pos, + const struct min_heap_callbacks *func) +{ + void *left, *right, *parent, *smallest; + void *data = heap->data; + + for (;;) { + if (pos * 2 + 1 >= heap->nr) + break; + + left = data + ((pos * 2 + 1) * func->elem_size); + parent = data + (pos * func->elem_size); + smallest = parent; + if (func->less(left, smallest)) + smallest = left; + + if (pos * 2 + 2 < heap->nr) { + right = data + ((pos * 2 + 2) * func->elem_size); + if (func->less(right, smallest)) + smallest = right; + } + if (smallest == parent) + break; + func->swp(smallest, parent); + if (smallest == left) + pos = (pos * 2) + 1; + else + pos = (pos * 2) + 2; + } +} + +/* Floyd's approach to heapification that is O(nr). */ +static __always_inline +void min_heapify_all(struct min_heap *heap, + const struct min_heap_callbacks *func) +{ + int i; + + for (i = heap->nr / 2; i >= 0; i--) + min_heapify(heap, i, func); +} + +/* Remove minimum element from the heap, O(log2(nr)). */ +static __always_inline +void min_heap_pop(struct min_heap *heap, + const struct min_heap_callbacks *func) +{ + void *data = heap->data; + + if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) + return; + + /* Place last element at the root (position 0) and then sift down. */ + heap->nr--; + memcpy(data, data + (heap->nr * func->elem_size), func->elem_size); + min_heapify(heap, 0, func); +} + +/* + * Remove the minimum element and then push the given element. The + * implementation performs 1 sift (O(log2(nr))) and is therefore more + * efficient than a pop followed by a push that does 2. + */ +static __always_inline +void min_heap_pop_push(struct min_heap *heap, + const void *element, + const struct min_heap_callbacks *func) +{ + memcpy(heap->data, element, func->elem_size); + min_heapify(heap, 0, func); +} + +/* Push an element on to the heap, O(log2(nr)). */ +static __always_inline +void min_heap_push(struct min_heap *heap, const void *element, + const struct min_heap_callbacks *func) +{ + void *data = heap->data; + void *child, *parent; + int pos; + + if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) + return; + + /* Place at the end of data. */ + pos = heap->nr; + memcpy(data + (pos * func->elem_size), element, func->elem_size); + heap->nr++; + + /* Sift child at pos up. */ + for (; pos > 0; pos = (pos - 1) / 2) { + child = data + (pos * func->elem_size); + parent = data + ((pos - 1) / 2) * func->elem_size; + if (func->less(parent, child)) + break; + func->swp(parent, child); + } +} + +#endif /* _LINUX_MIN_HEAP_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 547773f5894e..8768a39b5258 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -93,14 +93,26 @@ struct perf_raw_record { /* * branch stack layout: * nr: number of taken branches stored in entries[] + * hw_idx: The low level index of raw branch records + * for the most recent branch. + * -1ULL means invalid/unknown. * * Note that nr can vary from sample to sample * branches (to, from) are stored from most recent * to least recent, i.e., entries[0] contains the most * recent branch. + * The entries[] is an abstraction of raw branch records, + * which may not be stored in age order in HW, e.g. Intel LBR. + * The hw_idx is to expose the low level index of raw + * branch record for the most recent branch aka entries[0]. + * The hw_idx index is between -1 (unknown) and max depth, + * which can be retrieved in /sys/devices/cpu/caps/branches. + * For the architectures whose raw branch records are + * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { __u64 nr; + __u64 hw_idx; struct perf_branch_entry entries[0]; }; @@ -850,6 +862,13 @@ struct perf_cpu_context { int sched_cb_usage; int online; + /* + * Per-CPU storage for iterators used in visit_groups_merge. The default + * storage is of size 2 to hold the CPU and any CPU event iterators. + */ + int heap_size; + struct perf_event **heap; + struct perf_event *heap_default[2]; }; struct perf_output_handle { diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 377d794d3105..397cfd65b3fe 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -208,6 +210,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -853,7 +857,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 nr; - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX + * { u64 from, to, flags } lbr[nr]; + * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER |