summaryrefslogtreecommitdiff
path: root/include/linux/perf_event.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/perf_event.h')
-rw-r--r--include/linux/perf_event.h221
1 files changed, 172 insertions, 49 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ee8b9ecdc03b..c6a3bac76966 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -36,6 +36,7 @@ struct perf_guest_info_callbacks {
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <linux/rhashtable-types.h>
#include <asm/hw_breakpoint.h>
#endif
@@ -60,6 +61,7 @@ struct perf_guest_info_callbacks {
#include <linux/refcount.h>
#include <linux/security.h>
#include <linux/static_call.h>
+#include <linux/lockdep.h>
#include <asm/local.h>
struct perf_callchain_entry {
@@ -137,9 +139,11 @@ struct hw_perf_event_extra {
* PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
* usage.
*/
-#define PERF_EVENT_FLAG_ARCH 0x0000ffff
+#define PERF_EVENT_FLAG_ARCH 0x000fffff
#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
+static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
+
/**
* struct hw_perf_event - performance event hardware details:
*/
@@ -178,7 +182,7 @@ struct hw_perf_event {
* creation and event initalization.
*/
struct arch_hw_breakpoint info;
- struct list_head bp_list;
+ struct rhlist_head bp_list;
};
#endif
struct { /* amd_iommu */
@@ -262,6 +266,7 @@ struct hw_perf_event {
};
struct perf_event;
+struct perf_event_pmu_context;
/*
* Common implementation detail of pmu::{start,commit,cancel}_txn
@@ -304,7 +309,7 @@ struct pmu {
int capabilities;
int __percpu *pmu_disable_count;
- struct perf_cpu_context __percpu *pmu_cpu_context;
+ struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
@@ -439,7 +444,7 @@ struct pmu {
/*
* context-switches callback
*/
- void (*sched_task) (struct perf_event_context *ctx,
+ void (*sched_task) (struct perf_event_pmu_context *pmu_ctx,
bool sched_in);
/*
@@ -453,8 +458,8 @@ struct pmu {
* implementation and Perf core context switch handling callbacks for usage
* examples.
*/
- void (*swap_task_ctx) (struct perf_event_context *prev,
- struct perf_event_context *next);
+ void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc,
+ struct perf_event_pmu_context *next_epc);
/* optional */
/*
@@ -518,9 +523,10 @@ struct pmu {
/* optional */
/*
- * Filter events for PMU-specific reasons.
+ * Skip programming this PMU on the given CPU. Typically needed for
+ * big.LITTLE things.
*/
- int (*filter_match) (struct perf_event *event); /* optional */
+ bool (*filter) (struct pmu *pmu, int cpu); /* optional */
/*
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
@@ -631,7 +637,23 @@ struct pmu_event_list {
struct list_head list;
};
+/*
+ * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
+ * as such iteration must hold either lock. However, since ctx->lock is an IRQ
+ * safe lock, and is only held by the CPU doing the modification, having IRQs
+ * disabled is sufficient since it will hold-off the IPIs.
+ */
+#ifdef CONFIG_PROVE_LOCKING
+#define lockdep_assert_event_ctx(event) \
+ WARN_ON_ONCE(__lockdep_enabled && \
+ (this_cpu_read(hardirqs_enabled) && \
+ lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
+#else
+#define lockdep_assert_event_ctx(event)
+#endif
+
#define for_each_sibling_event(sibling, event) \
+ lockdep_assert_event_ctx(event); \
if ((event)->group_leader == (event)) \
list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
@@ -675,6 +697,11 @@ struct perf_event {
int group_caps;
struct perf_event *group_leader;
+ /*
+ * event->pmu will always point to pmu in which this event belongs.
+ * Whereas event->pmu_ctx->pmu may point to other pmu when group of
+ * different pmu events is created.
+ */
struct pmu *pmu;
void *pmu_private;
@@ -700,6 +727,12 @@ struct perf_event {
struct hw_perf_event hw;
struct perf_event_context *ctx;
+ /*
+ * event->pmu_ctx points to perf_event_pmu_context in which the event
+ * is added. This pmu_ctx can be of other pmu for sw event when that
+ * sw event is part of a group which also contains non-sw events.
+ */
+ struct perf_event_pmu_context *pmu_ctx;
atomic_long_t refcount;
/*
@@ -736,11 +769,14 @@ struct perf_event {
struct fasync_struct *fasync;
/* delayed work for NMIs and such */
- int pending_wakeup;
- int pending_kill;
- int pending_disable;
+ unsigned int pending_wakeup;
+ unsigned int pending_kill;
+ unsigned int pending_disable;
+ unsigned int pending_sigtrap;
unsigned long pending_addr; /* SIGTRAP */
- struct irq_work pending;
+ struct irq_work pending_irq;
+ struct callback_head pending_task;
+ unsigned int pending_work;
atomic_t event_limit;
@@ -789,19 +825,69 @@ struct perf_event {
#endif /* CONFIG_PERF_EVENTS */
};
+/*
+ * ,-----------------------[1:n]----------------------.
+ * V V
+ * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
+ * ^ ^ | |
+ * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-'
+ *
+ *
+ * struct perf_event_pmu_context lifetime is refcount based and RCU freed
+ * (similar to perf_event_context). Locking is as if it were a member of
+ * perf_event_context; specifically:
+ *
+ * modification, both: ctx->mutex && ctx->lock
+ * reading, either: ctx->mutex || ctx->lock
+ *
+ * There is one exception to this; namely put_pmu_ctx() isn't always called
+ * with ctx->mutex held; this means that as long as we can guarantee the epc
+ * has events the above rules hold.
+ *
+ * Specificially, sys_perf_event_open()'s group_leader case depends on
+ * ctx->mutex pinning the configuration. Since we hold a reference on
+ * group_leader (through the filedesc) it can't go away, therefore it's
+ * associated pmu_ctx must exist and cannot change due to ctx->mutex.
+ */
+struct perf_event_pmu_context {
+ struct pmu *pmu;
+ struct perf_event_context *ctx;
+
+ struct list_head pmu_ctx_entry;
+
+ struct list_head pinned_active;
+ struct list_head flexible_active;
+
+ /* Used to avoid freeing per-cpu perf_event_pmu_context */
+ unsigned int embedded : 1;
+
+ unsigned int nr_events;
+
+ atomic_t refcount; /* event <-> epc */
+ struct rcu_head rcu_head;
+
+ void *task_ctx_data; /* pmu specific data */
+ /*
+ * Set when one or more (plausibly active) event can't be scheduled
+ * due to pmu overcommit or pmu constraints, except tolerant to
+ * events not necessary to be active due to scheduling constraints,
+ * such as cgroups.
+ */
+ int rotate_necessary;
+};
struct perf_event_groups {
struct rb_root tree;
u64 index;
};
+
/**
* struct perf_event_context - event context structure
*
* Used as a container for task events and CPU events as well:
*/
struct perf_event_context {
- struct pmu *pmu;
/*
* Protect the states of the events in the list,
* nr_active, and the list:
@@ -814,27 +900,21 @@ struct perf_event_context {
*/
struct mutex mutex;
- struct list_head active_ctx_list;
+ struct list_head pmu_ctx_list;
struct perf_event_groups pinned_groups;
struct perf_event_groups flexible_groups;
struct list_head event_list;
- struct list_head pinned_active;
- struct list_head flexible_active;
-
int nr_events;
- int nr_active;
int nr_user;
int is_active;
+
+ int nr_task_data;
int nr_stat;
int nr_freq;
int rotate_disable;
- /*
- * Set when nr_events != nr_active, except tolerant to events not
- * necessary to be active due to scheduling constraints, such as cgroups.
- */
- int rotate_necessary;
- refcount_t refcount;
+
+ refcount_t refcount; /* event <-> ctx */
struct task_struct *task;
/*
@@ -855,8 +935,15 @@ struct perf_event_context {
#ifdef CONFIG_CGROUP_PERF
int nr_cgroups; /* cgroup evts */
#endif
- void *task_ctx_data; /* pmu specific data */
struct rcu_head rcu_head;
+
+ /*
+ * Sum (event->pending_sigtrap + event->pending_work)
+ *
+ * The SIGTRAP is targeted at ctx->task, as such it won't do changing
+ * that until the signal is delivered.
+ */
+ local_t nr_pending;
};
/*
@@ -865,12 +952,13 @@ struct perf_event_context {
*/
#define PERF_NR_CONTEXTS 4
-/**
- * struct perf_cpu_context - per cpu event context structure
- */
-struct perf_cpu_context {
- struct perf_event_context ctx;
- struct perf_event_context *task_ctx;
+struct perf_cpu_pmu_context {
+ struct perf_event_pmu_context epc;
+ struct perf_event_pmu_context *task_epc;
+
+ struct list_head sched_cb_entry;
+ int sched_cb_usage;
+
int active_oncpu;
int exclusive;
@@ -878,16 +966,20 @@ struct perf_cpu_context {
struct hrtimer hrtimer;
ktime_t hrtimer_interval;
unsigned int hrtimer_active;
+};
+
+/**
+ * struct perf_event_cpu_context - per cpu event context structure
+ */
+struct perf_cpu_context {
+ struct perf_event_context ctx;
+ struct perf_event_context *task_ctx;
+ int online;
#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp;
- struct list_head cgrp_cpuctx_entry;
#endif
- struct list_head sched_cb_entry;
- int sched_cb_usage;
-
- int online;
/*
* Per-CPU storage for iterators used in visit_groups_merge. The default
* storage is of size 2 to hold the CPU and any CPU event iterators.
@@ -951,6 +1043,8 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
#ifdef CONFIG_PERF_EVENTS
+extern struct perf_event_context *perf_cpu_task_ctx(void);
+
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *event);
extern void perf_aux_output_end(struct perf_output_handle *handle,
@@ -1007,18 +1101,20 @@ struct perf_sample_data {
* Fields set by perf_sample_data_init(), group so as to
* minimize the cachelines touched.
*/
- u64 addr;
- struct perf_raw_record *raw;
- struct perf_branch_stack *br_stack;
+ u64 sample_flags;
u64 period;
- union perf_sample_weight weight;
- u64 txn;
- union perf_mem_data_src data_src;
/*
* The other fields, optionally {set,used} by
* perf_{prepare,output}_sample().
*/
+ struct perf_branch_stack *br_stack;
+ union perf_sample_weight weight;
+ union perf_mem_data_src data_src;
+ u64 txn;
+ u64 addr;
+ struct perf_raw_record *raw;
+
u64 type;
u64 ip;
struct {
@@ -1056,13 +1152,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
u64 addr, u64 period)
{
/* remaining struct members initialized in perf_prepare_sample() */
- data->addr = addr;
- data->raw = NULL;
- data->br_stack = NULL;
+ data->sample_flags = PERF_SAMPLE_PERIOD;
data->period = period;
- data->weight.full = 0;
- data->data_src.val = PERF_MEM_NA;
- data->txn = 0;
+
+ if (addr) {
+ data->addr = addr;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
}
/*
@@ -1078,6 +1174,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b
br->abort = 0;
br->cycles = 0;
br->type = 0;
+ br->spec = PERF_BR_SPEC_NA;
br->reserved = 0;
}
@@ -1153,7 +1250,7 @@ static inline int is_software_event(struct perf_event *event)
*/
static inline int in_software_context(struct perf_event *event)
{
- return event->ctx->pmu->task_ctx_nr == perf_sw_context;
+ return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
}
static inline int is_exclusive_pmu(struct pmu *pmu)
@@ -1684,4 +1781,30 @@ static inline void perf_lopwr_cb(bool mode)
}
#endif
+#ifdef CONFIG_PERF_EVENTS
+static inline bool branch_sample_no_flags(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
+}
+
+static inline bool branch_sample_no_cycles(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
+}
+
+static inline bool branch_sample_type(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
+}
+
+static inline bool branch_sample_hw_index(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
+static inline bool branch_sample_priv(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
+}
+#endif /* CONFIG_PERF_EVENTS */
#endif /* _LINUX_PERF_EVENT_H */