summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/min_heap.h134
-rw-r--r--include/linux/perf_event.h19
-rw-r--r--include/uapi/linux/perf_event.h8
3 files changed, 160 insertions, 1 deletions
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
new file mode 100644
index 000000000000..44077837385f
--- /dev/null
+++ b/include/linux/min_heap.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MIN_HEAP_H
+#define _LINUX_MIN_HEAP_H
+
+#include <linux/bug.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+/**
+ * struct min_heap - Data structure to hold a min-heap.
+ * @data: Start of array holding the heap elements.
+ * @nr: Number of elements currently in the heap.
+ * @size: Maximum number of elements that can be held in current storage.
+ */
+struct min_heap {
+ void *data;
+ int nr;
+ int size;
+};
+
+/**
+ * struct min_heap_callbacks - Data/functions to customise the min_heap.
+ * @elem_size: The nr of each element in bytes.
+ * @less: Partial order function for this heap.
+ * @swp: Swap elements function.
+ */
+struct min_heap_callbacks {
+ int elem_size;
+ bool (*less)(const void *lhs, const void *rhs);
+ void (*swp)(void *lhs, void *rhs);
+};
+
+/* Sift the element at pos down the heap. */
+static __always_inline
+void min_heapify(struct min_heap *heap, int pos,
+ const struct min_heap_callbacks *func)
+{
+ void *left, *right, *parent, *smallest;
+ void *data = heap->data;
+
+ for (;;) {
+ if (pos * 2 + 1 >= heap->nr)
+ break;
+
+ left = data + ((pos * 2 + 1) * func->elem_size);
+ parent = data + (pos * func->elem_size);
+ smallest = parent;
+ if (func->less(left, smallest))
+ smallest = left;
+
+ if (pos * 2 + 2 < heap->nr) {
+ right = data + ((pos * 2 + 2) * func->elem_size);
+ if (func->less(right, smallest))
+ smallest = right;
+ }
+ if (smallest == parent)
+ break;
+ func->swp(smallest, parent);
+ if (smallest == left)
+ pos = (pos * 2) + 1;
+ else
+ pos = (pos * 2) + 2;
+ }
+}
+
+/* Floyd's approach to heapification that is O(nr). */
+static __always_inline
+void min_heapify_all(struct min_heap *heap,
+ const struct min_heap_callbacks *func)
+{
+ int i;
+
+ for (i = heap->nr / 2; i >= 0; i--)
+ min_heapify(heap, i, func);
+}
+
+/* Remove minimum element from the heap, O(log2(nr)). */
+static __always_inline
+void min_heap_pop(struct min_heap *heap,
+ const struct min_heap_callbacks *func)
+{
+ void *data = heap->data;
+
+ if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
+ return;
+
+ /* Place last element at the root (position 0) and then sift down. */
+ heap->nr--;
+ memcpy(data, data + (heap->nr * func->elem_size), func->elem_size);
+ min_heapify(heap, 0, func);
+}
+
+/*
+ * Remove the minimum element and then push the given element. The
+ * implementation performs 1 sift (O(log2(nr))) and is therefore more
+ * efficient than a pop followed by a push that does 2.
+ */
+static __always_inline
+void min_heap_pop_push(struct min_heap *heap,
+ const void *element,
+ const struct min_heap_callbacks *func)
+{
+ memcpy(heap->data, element, func->elem_size);
+ min_heapify(heap, 0, func);
+}
+
+/* Push an element on to the heap, O(log2(nr)). */
+static __always_inline
+void min_heap_push(struct min_heap *heap, const void *element,
+ const struct min_heap_callbacks *func)
+{
+ void *data = heap->data;
+ void *child, *parent;
+ int pos;
+
+ if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap"))
+ return;
+
+ /* Place at the end of data. */
+ pos = heap->nr;
+ memcpy(data + (pos * func->elem_size), element, func->elem_size);
+ heap->nr++;
+
+ /* Sift child at pos up. */
+ for (; pos > 0; pos = (pos - 1) / 2) {
+ child = data + (pos * func->elem_size);
+ parent = data + ((pos - 1) / 2) * func->elem_size;
+ if (func->less(parent, child))
+ break;
+ func->swp(parent, child);
+ }
+}
+
+#endif /* _LINUX_MIN_HEAP_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 547773f5894e..8768a39b5258 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -93,14 +93,26 @@ struct perf_raw_record {
/*
* branch stack layout:
* nr: number of taken branches stored in entries[]
+ * hw_idx: The low level index of raw branch records
+ * for the most recent branch.
+ * -1ULL means invalid/unknown.
*
* Note that nr can vary from sample to sample
* branches (to, from) are stored from most recent
* to least recent, i.e., entries[0] contains the most
* recent branch.
+ * The entries[] is an abstraction of raw branch records,
+ * which may not be stored in age order in HW, e.g. Intel LBR.
+ * The hw_idx is to expose the low level index of raw
+ * branch record for the most recent branch aka entries[0].
+ * The hw_idx index is between -1 (unknown) and max depth,
+ * which can be retrieved in /sys/devices/cpu/caps/branches.
+ * For the architectures whose raw branch records are
+ * already stored in age order, the hw_idx should be 0.
*/
struct perf_branch_stack {
__u64 nr;
+ __u64 hw_idx;
struct perf_branch_entry entries[0];
};
@@ -850,6 +862,13 @@ struct perf_cpu_context {
int sched_cb_usage;
int online;
+ /*
+ * Per-CPU storage for iterators used in visit_groups_merge. The default
+ * storage is of size 2 to hold the CPU and any CPU event iterators.
+ */
+ int heap_size;
+ struct perf_event **heap;
+ struct perf_event *heap_default[2];
};
struct perf_output_handle {
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 377d794d3105..397cfd65b3fe 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
+ PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -208,6 +210,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_TYPE_SAVE =
1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
@@ -853,7 +857,9 @@ enum perf_event_type {
* char data[size];}&& PERF_SAMPLE_RAW
*
* { u64 nr;
- * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+ * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
+ * { u64 from, to, flags } lbr[nr];
+ * } && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER