diff options
Diffstat (limited to 'tools/perf/util/auxtrace.c')
-rw-r--r-- | tools/perf/util/auxtrace.c | 88 |
1 files changed, 84 insertions, 4 deletions
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index f33f09b8b535..8d2865b9ade2 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1669,6 +1669,82 @@ int perf_event__process_auxtrace_error(struct perf_session *session, return 0; } +/* + * In the compat mode kernel runs in 64-bit and perf tool runs in 32-bit mode, + * 32-bit perf tool cannot access 64-bit value atomically, which might lead to + * the issues caused by the below sequence on multiple CPUs: when perf tool + * accesses either the load operation or the store operation for 64-bit value, + * on some architectures the operation is divided into two instructions, one + * is for accessing the low 32-bit value and another is for the high 32-bit; + * thus these two user operations can give the kernel chances to access the + * 64-bit value, and thus leads to the unexpected load values. + * + * kernel (64-bit) user (32-bit) + * + * if (LOAD ->aux_tail) { --, LOAD ->aux_head_lo + * STORE $aux_data | ,---> + * FLUSH $aux_data | | LOAD ->aux_head_hi + * STORE ->aux_head --|-------` smp_rmb() + * } | LOAD $data + * | smp_mb() + * | STORE ->aux_tail_lo + * `-----------> + * STORE ->aux_tail_hi + * + * For this reason, it's impossible for the perf tool to work correctly when + * the AUX head or tail is bigger than 4GB (more than 32 bits length); and we + * can not simply limit the AUX ring buffer to less than 4GB, the reason is + * the pointers can be increased monotonically, whatever the buffer size it is, + * at the end the head and tail can be bigger than 4GB and carry out to the + * high 32-bit. + * + * To mitigate the issues and improve the user experience, we can allow the + * perf tool working in certain conditions and bail out with error if detect + * any overflow cannot be handled. + * + * For reading the AUX head, it reads out the values for three times, and + * compares the high 4 bytes of the values between the first time and the last + * time, if there has no change for high 4 bytes injected by the kernel during + * the user reading sequence, it's safe for use the second value. + * + * When compat_auxtrace_mmap__write_tail() detects any carrying in the high + * 32 bits, it means there have two store operations in user space and it cannot + * promise the atomicity for 64-bit write, so return '-1' in this case to tell + * the caller an overflow error has happened. + */ +u64 __weak compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->userpg; + u64 first, second, last; + u64 mask = (u64)(UINT32_MAX) << 32; + + do { + first = READ_ONCE(pc->aux_head); + /* Ensure all reads are done after we read the head */ + smp_rmb(); + second = READ_ONCE(pc->aux_head); + /* Ensure all reads are done after we read the head */ + smp_rmb(); + last = READ_ONCE(pc->aux_head); + } while ((first & mask) != (last & mask)); + + return second; +} + +int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail) +{ + struct perf_event_mmap_page *pc = mm->userpg; + u64 mask = (u64)(UINT32_MAX) << 32; + + if (tail & mask) + return -1; + + /* Ensure all reads are done before we write the tail out */ + smp_mb(); + WRITE_ONCE(pc->aux_tail, tail); + return 0; +} + static int __auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, @@ -1680,8 +1756,9 @@ static int __auxtrace_mmap__read(struct mmap *map, size_t size, head_off, old_off, len1, len2, padding; union perf_event ev; void *data1, *data2; + int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL)); - head = auxtrace_mmap__read_head(mm); + head = auxtrace_mmap__read_head(mm, kernel_is_64_bit); if (snapshot && auxtrace_record__find_snapshot(itr, mm->idx, mm, data, &head, &old)) @@ -1764,10 +1841,13 @@ static int __auxtrace_mmap__read(struct mmap *map, mm->prev = head; if (!snapshot) { - auxtrace_mmap__write_tail(mm, head); - if (itr->read_finish) { - int err; + int err; + + err = auxtrace_mmap__write_tail(mm, head, kernel_is_64_bit); + if (err < 0) + return err; + if (itr->read_finish) { err = itr->read_finish(itr, mm->idx); if (err < 0) return err; |